Code-agent-team-beta-updates

Sleeping

Keeby-smilyai commited on Sep 23

Commit

8ecdc1c

verified ·

1 Parent(s): ccb7e01

Update models/loader.py

Files changed (1) hide show

models/loader.py CHANGED Viewed

@@ -1,9 +1,9 @@
 # models/loader.py
 import torch
-import os
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from backend.agents import ROLE_PROMPTS
 QUANTIZATION_CONFIG = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_quant_type="nf4",
@@ -30,12 +30,15 @@ def get_model_and_tokenizer(model_name="Qwen/Qwen3-0.6B"):
         _MODEL_CACHE[model_name] = {
             "model": AutoModelForCausalLM.from_pretrained(
                 model_name,
-                device_map="auto",
-                quantization_config=QUANTIZATION_CONFIG,
                 trust_remote_code=True,
             ),
             "tokenizer": AutoTokenizer.from_pretrained(model_name)
         }
     return _MODEL_CACHE[model_name]["model"], _MODEL_CACHE[model_name]["tokenizer"]
 def generate_with_model(agent_role, prompt):

 # models/loader.py
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from backend.agents import ROLE_PROMPTS
+# The following configs are no longer used for CPU, but kept for future GPU use.
 QUANTIZATION_CONFIG = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_quant_type="nf4",
         _MODEL_CACHE[model_name] = {
             "model": AutoModelForCausalLM.from_pretrained(
                 model_name,
+                device_map=None,
+                quantization_config=None,
                 trust_remote_code=True,
             ),
             "tokenizer": AutoTokenizer.from_pretrained(model_name)
         }
+        # Explicitly move the model to the CPU after loading
+        _MODEL_CACHE[model_name]["model"].to("cpu")
     return _MODEL_CACHE[model_name]["model"], _MODEL_CACHE[model_name]["tokenizer"]
 def generate_with_model(agent_role, prompt):