Keeby-smilyai commited on
Commit
8ecdc1c
·
verified ·
1 Parent(s): ccb7e01

Update models/loader.py

Browse files
Files changed (1) hide show
  1. models/loader.py +6 -3
models/loader.py CHANGED
@@ -1,9 +1,9 @@
1
  # models/loader.py
2
  import torch
3
- import os
4
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
5
  from backend.agents import ROLE_PROMPTS
6
 
 
7
  QUANTIZATION_CONFIG = BitsAndBytesConfig(
8
  load_in_4bit=True,
9
  bnb_4bit_quant_type="nf4",
@@ -30,12 +30,15 @@ def get_model_and_tokenizer(model_name="Qwen/Qwen3-0.6B"):
30
  _MODEL_CACHE[model_name] = {
31
  "model": AutoModelForCausalLM.from_pretrained(
32
  model_name,
33
- device_map="auto",
34
- quantization_config=QUANTIZATION_CONFIG,
35
  trust_remote_code=True,
36
  ),
37
  "tokenizer": AutoTokenizer.from_pretrained(model_name)
38
  }
 
 
 
39
  return _MODEL_CACHE[model_name]["model"], _MODEL_CACHE[model_name]["tokenizer"]
40
 
41
  def generate_with_model(agent_role, prompt):
 
1
  # models/loader.py
2
  import torch
 
3
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
4
  from backend.agents import ROLE_PROMPTS
5
 
6
+ # The following configs are no longer used for CPU, but kept for future GPU use.
7
  QUANTIZATION_CONFIG = BitsAndBytesConfig(
8
  load_in_4bit=True,
9
  bnb_4bit_quant_type="nf4",
 
30
  _MODEL_CACHE[model_name] = {
31
  "model": AutoModelForCausalLM.from_pretrained(
32
  model_name,
33
+ device_map=None,
34
+ quantization_config=None,
35
  trust_remote_code=True,
36
  ),
37
  "tokenizer": AutoTokenizer.from_pretrained(model_name)
38
  }
39
+ # Explicitly move the model to the CPU after loading
40
+ _MODEL_CACHE[model_name]["model"].to("cpu")
41
+
42
  return _MODEL_CACHE[model_name]["model"], _MODEL_CACHE[model_name]["tokenizer"]
43
 
44
  def generate_with_model(agent_role, prompt):