Spaces:
Paused
Paused
Update src/model_loader.py
Browse files- src/model_loader.py +7 -7
src/model_loader.py
CHANGED
@@ -9,12 +9,12 @@ cached_tokenizer = None
|
|
9 |
def load_model():
|
10 |
global cached_model, cached_tokenizer
|
11 |
if cached_model is None or cached_tokenizer is None:
|
12 |
-
bnb_config = BitsAndBytesConfig(
|
13 |
-
load_in_4bit=True,
|
14 |
-
bnb_4bit_use_double_quant=True,
|
15 |
-
bnb_4bit_quant_type="nf4",
|
16 |
-
bnb_4bit_compute_dtype=torch.bfloat16
|
17 |
-
)
|
18 |
-
cached_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME
|
19 |
cached_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
20 |
return cached_model, cached_tokenizer
|
|
|
9 |
def load_model():
|
10 |
global cached_model, cached_tokenizer
|
11 |
if cached_model is None or cached_tokenizer is None:
|
12 |
+
# bnb_config = BitsAndBytesConfig(
|
13 |
+
# load_in_4bit=True,
|
14 |
+
# bnb_4bit_use_double_quant=True,
|
15 |
+
# bnb_4bit_quant_type="nf4",
|
16 |
+
# bnb_4bit_compute_dtype=torch.bfloat16
|
17 |
+
# )
|
18 |
+
cached_model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) #, quantization_config=bnb_config
|
19 |
cached_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
|
20 |
return cached_model, cached_tokenizer
|