Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files- README.md +18 -0
- app.py +14 -0
- requirements.txt +6 -0
- run_transformers_training.py +69 -24
README.md
CHANGED
@@ -14,6 +14,24 @@ license: mit
|
|
14 |
|
15 |
This space is dedicated to training Microsoft's Phi-4 model using Unsloth optimizations for enhanced performance and efficiency. The training process utilizes 4-bit quantization and advanced memory optimizations.
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
## Features
|
18 |
|
19 |
- 4-bit quantization using Unsloth
|
|
|
14 |
|
15 |
This space is dedicated to training Microsoft's Phi-4 model using Unsloth optimizations for enhanced performance and efficiency. The training process utilizes 4-bit quantization and advanced memory optimizations.
|
16 |
|
17 |
+
## Installation
|
18 |
+
|
19 |
+
Before running the training, ensure you have all required dependencies installed:
|
20 |
+
|
21 |
+
```bash
|
22 |
+
pip install -r requirements.txt
|
23 |
+
```
|
24 |
+
|
25 |
+
### Critical Dependencies
|
26 |
+
|
27 |
+
- **unsloth** (>=2024.3): Required for optimized 4-bit training
|
28 |
+
- **peft** (>=0.9.0): Required for parameter-efficient fine-tuning
|
29 |
+
- **transformers** (>=4.36.0): Required for model architecture and tokenization
|
30 |
+
|
31 |
+
### Optional but Recommended
|
32 |
+
|
33 |
+
- **flash-attn** (>=2.5.0): Significantly speeds up attention computations
|
34 |
+
|
35 |
## Features
|
36 |
|
37 |
- 4-bit quantization using Unsloth
|
app.py
CHANGED
@@ -196,6 +196,19 @@ with gr.Blocks(title="Phi-4 Unsloth Training", theme=gr.themes.Soft(primary_hue=
|
|
196 |
|
197 |
This interface allows you to manage training of the Phi-4 model with Unsloth 4-bit optimizations.
|
198 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
### Quick Start
|
200 |
|
201 |
1. Review the configuration in the Configuration tab
|
@@ -214,6 +227,7 @@ with gr.Blocks(title="Phi-4 Unsloth Training", theme=gr.themes.Soft(primary_hue=
|
|
214 |
- Check the logs for out-of-memory errors
|
215 |
- Verify the VRAM usage on each GPU
|
216 |
- Check for CUDA version compatibility
|
|
|
217 |
""")
|
218 |
|
219 |
# Launch the app
|
|
|
196 |
|
197 |
This interface allows you to manage training of the Phi-4 model with Unsloth 4-bit optimizations.
|
198 |
|
199 |
+
### Installation
|
200 |
+
|
201 |
+
Before starting training, ensure all dependencies are installed:
|
202 |
+
|
203 |
+
```bash
|
204 |
+
pip install -r requirements.txt
|
205 |
+
```
|
206 |
+
|
207 |
+
Critical packages:
|
208 |
+
- unsloth (>=2024.3)
|
209 |
+
- peft (>=0.9.0)
|
210 |
+
- transformers (>=4.36.0)
|
211 |
+
|
212 |
### Quick Start
|
213 |
|
214 |
1. Review the configuration in the Configuration tab
|
|
|
227 |
- Check the logs for out-of-memory errors
|
228 |
- Verify the VRAM usage on each GPU
|
229 |
- Check for CUDA version compatibility
|
230 |
+
- If you see "Unsloth not available" error, run: `pip install unsloth>=2024.3 peft>=0.9.0`
|
231 |
""")
|
232 |
|
233 |
# Launch the app
|
requirements.txt
CHANGED
@@ -1,20 +1,26 @@
|
|
1 |
accelerate>=0.27.0
|
2 |
bitsandbytes>=0.41.0
|
3 |
datasets>=2.15.0
|
|
|
4 |
filelock>=3.13.1
|
|
|
5 |
gradio>=5.17.0
|
6 |
huggingface-hub>=0.19.0
|
7 |
matplotlib>=3.7.0
|
8 |
numpy>=1.24.0
|
9 |
packaging>=23.0
|
|
|
|
|
10 |
psutil>=5.9.0
|
11 |
python-dotenv>=1.0.0
|
12 |
pyyaml>=6.0.1
|
13 |
regex>=2023.0.0
|
14 |
requests>=2.31.0
|
15 |
safetensors>=0.4.1
|
|
|
16 |
tensorboard>=2.15.0
|
17 |
torch>=2.0.0
|
18 |
tqdm>=4.65.0
|
19 |
transformers>=4.36.0
|
20 |
typing-extensions>=4.8.0
|
|
|
|
1 |
accelerate>=0.27.0
|
2 |
bitsandbytes>=0.41.0
|
3 |
datasets>=2.15.0
|
4 |
+
einops>=0.7.0
|
5 |
filelock>=3.13.1
|
6 |
+
flash-attn>=2.5.0
|
7 |
gradio>=5.17.0
|
8 |
huggingface-hub>=0.19.0
|
9 |
matplotlib>=3.7.0
|
10 |
numpy>=1.24.0
|
11 |
packaging>=23.0
|
12 |
+
peft>=0.9.0
|
13 |
+
protobuf>=4.23.4
|
14 |
psutil>=5.9.0
|
15 |
python-dotenv>=1.0.0
|
16 |
pyyaml>=6.0.1
|
17 |
regex>=2023.0.0
|
18 |
requests>=2.31.0
|
19 |
safetensors>=0.4.1
|
20 |
+
sentencepiece>=0.1.99
|
21 |
tensorboard>=2.15.0
|
22 |
torch>=2.0.0
|
23 |
tqdm>=4.65.0
|
24 |
transformers>=4.36.0
|
25 |
typing-extensions>=4.8.0
|
26 |
+
unsloth>=2024.3
|
run_transformers_training.py
CHANGED
@@ -127,31 +127,33 @@ def parse_args():
|
|
127 |
def load_model_and_tokenizer(config):
|
128 |
"""Load model and tokenizer with proper error handling and optimizations."""
|
129 |
try:
|
130 |
-
if unsloth_available:
|
131 |
-
logger.info("Using Unsloth optimizations with pre-quantized model")
|
132 |
-
model, tokenizer = FastLanguageModel.from_pretrained(
|
133 |
-
model_name=config.get("model_name"),
|
134 |
-
max_seq_length=config.get("max_seq_length", 2048),
|
135 |
-
dtype=None, # Let Unsloth choose optimal dtype
|
136 |
-
device_map="auto",
|
137 |
-
)
|
138 |
-
|
139 |
-
# Apply Unsloth's training optimizations with config parameters
|
140 |
-
model = FastLanguageModel.get_peft_model(
|
141 |
-
model,
|
142 |
-
r=config.get("unsloth_r", 32),
|
143 |
-
target_modules=config.get("unsloth_target_modules",
|
144 |
-
["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]),
|
145 |
-
lora_alpha=config.get("unsloth_alpha", 16),
|
146 |
-
lora_dropout=config.get("unsloth_dropout", 0.05),
|
147 |
-
bias="none",
|
148 |
-
use_gradient_checkpointing=config.get("gradient_checkpointing", True),
|
149 |
-
random_state=config.get("seed", 42),
|
150 |
-
)
|
151 |
-
logger.info("Unsloth optimizations applied successfully")
|
152 |
-
else:
|
153 |
logger.error("Unsloth is required for training with pre-quantized model")
|
154 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
|
156 |
# Set up tokenizer settings
|
157 |
if config.get("chat_template"):
|
@@ -168,6 +170,7 @@ def load_model_and_tokenizer(config):
|
|
168 |
|
169 |
except Exception as e:
|
170 |
logger.error(f"Error in model/tokenizer loading: {str(e)}")
|
|
|
171 |
raise
|
172 |
|
173 |
def load_dataset_with_mapping(dataset_config):
|
@@ -359,6 +362,43 @@ class SimpleDataCollator:
|
|
359 |
|
360 |
return batch
|
361 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
362 |
def main():
|
363 |
# Set up logging
|
364 |
logger.info("Starting training process")
|
@@ -366,6 +406,11 @@ def main():
|
|
366 |
# Parse arguments
|
367 |
args = parse_args()
|
368 |
|
|
|
|
|
|
|
|
|
|
|
369 |
# Load environment variables
|
370 |
load_env_variables()
|
371 |
|
|
|
127 |
def load_model_and_tokenizer(config):
|
128 |
"""Load model and tokenizer with proper error handling and optimizations."""
|
129 |
try:
|
130 |
+
if not unsloth_available:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
logger.error("Unsloth is required for training with pre-quantized model")
|
132 |
+
logger.error("Please install required packages with: pip install -r requirements.txt")
|
133 |
+
logger.error("Or directly install with: pip install unsloth>=2024.3 peft>=0.9.0")
|
134 |
+
raise ImportError("Required packages missing. See log for installation instructions.")
|
135 |
+
|
136 |
+
logger.info("Using Unsloth optimizations with pre-quantized model")
|
137 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
138 |
+
model_name=config.get("model_name"),
|
139 |
+
max_seq_length=config.get("max_seq_length", 2048),
|
140 |
+
dtype=None, # Let Unsloth choose optimal dtype
|
141 |
+
device_map="auto",
|
142 |
+
)
|
143 |
+
|
144 |
+
# Apply Unsloth's training optimizations with config parameters
|
145 |
+
model = FastLanguageModel.get_peft_model(
|
146 |
+
model,
|
147 |
+
r=config.get("unsloth_r", 32),
|
148 |
+
target_modules=config.get("unsloth_target_modules",
|
149 |
+
["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]),
|
150 |
+
lora_alpha=config.get("unsloth_alpha", 16),
|
151 |
+
lora_dropout=config.get("unsloth_dropout", 0.05),
|
152 |
+
bias="none",
|
153 |
+
use_gradient_checkpointing=config.get("gradient_checkpointing", True),
|
154 |
+
random_state=config.get("seed", 42),
|
155 |
+
)
|
156 |
+
logger.info("Unsloth optimizations applied successfully")
|
157 |
|
158 |
# Set up tokenizer settings
|
159 |
if config.get("chat_template"):
|
|
|
170 |
|
171 |
except Exception as e:
|
172 |
logger.error(f"Error in model/tokenizer loading: {str(e)}")
|
173 |
+
logger.error("If missing dependencies, install with: pip install -r requirements.txt")
|
174 |
raise
|
175 |
|
176 |
def load_dataset_with_mapping(dataset_config):
|
|
|
362 |
|
363 |
return batch
|
364 |
|
365 |
+
def check_dependencies():
|
366 |
+
"""Check if all required dependencies are installed."""
|
367 |
+
missing_packages = []
|
368 |
+
|
369 |
+
# Critical packages
|
370 |
+
if not unsloth_available:
|
371 |
+
missing_packages.append("unsloth>=2024.3")
|
372 |
+
|
373 |
+
if not peft_available:
|
374 |
+
missing_packages.append("peft>=0.9.0")
|
375 |
+
|
376 |
+
# Optional but recommended packages
|
377 |
+
try:
|
378 |
+
import flash_attn
|
379 |
+
except ImportError:
|
380 |
+
logger.warning("flash-attn not found. Flash attention will not be used.")
|
381 |
+
missing_packages.append("flash-attn>=2.5.0 (optional)")
|
382 |
+
|
383 |
+
# If critical packages are missing, exit with instructions
|
384 |
+
critical_missing = [pkg for pkg in missing_packages if "optional" not in pkg]
|
385 |
+
if critical_missing:
|
386 |
+
logger.error("Critical dependencies missing:")
|
387 |
+
for pkg in critical_missing:
|
388 |
+
logger.error(f" - {pkg}")
|
389 |
+
logger.error("Please install required packages with: pip install -r requirements.txt")
|
390 |
+
logger.error(f"Or directly with: pip install {' '.join(critical_missing)}")
|
391 |
+
return False
|
392 |
+
|
393 |
+
# If optional packages are missing, just warn
|
394 |
+
optional_missing = [pkg for pkg in missing_packages if "optional" in pkg]
|
395 |
+
if optional_missing:
|
396 |
+
logger.warning("Optional dependencies missing (recommended for best performance):")
|
397 |
+
for pkg in optional_missing:
|
398 |
+
logger.warning(f" - {pkg.split(' ')[0]}")
|
399 |
+
|
400 |
+
return True
|
401 |
+
|
402 |
def main():
|
403 |
# Set up logging
|
404 |
logger.info("Starting training process")
|
|
|
406 |
# Parse arguments
|
407 |
args = parse_args()
|
408 |
|
409 |
+
# Check dependencies
|
410 |
+
if not check_dependencies():
|
411 |
+
logger.error("Aborting due to missing critical dependencies")
|
412 |
+
return 1
|
413 |
+
|
414 |
# Load environment variables
|
415 |
load_env_variables()
|
416 |
|