|
import os |
|
import subprocess |
|
|
|
import yaml |
|
|
|
|
|
config_template = { |
|
"task": "llm-sft", |
|
"base_model": "mistralai/Mistral-7B-Instruct-v0.3", |
|
"project_name": "", |
|
"log": "tensorboard", |
|
"backend": "spaces-l4x1", |
|
"data": { |
|
"path": "derek-thomas/labeled-multiple-choice-explained-mistral-tokenized", |
|
"train_split": "train", |
|
"valid_split": None, |
|
"chat_template": "none", |
|
"column_mapping": { |
|
"text_column": "" |
|
}, |
|
}, |
|
"params": { |
|
"block_size": 1024, |
|
"model_max_length": 1024, |
|
"epochs": 2, |
|
"batch_size": 1, |
|
"lr": 3e-5, |
|
"peft": True, |
|
"quantization": "int4", |
|
"target_modules": "all-linear", |
|
"padding": "left", |
|
"optimizer": "adamw_torch", |
|
"scheduler": "linear", |
|
"gradient_accumulation": 8, |
|
"mixed_precision": "bf16", |
|
}, |
|
"hub": { |
|
"username": "derek-thomas", |
|
"token": os.getenv('HF_TOKEN'), |
|
"push_to_hub": True, |
|
}, |
|
} |
|
|
|
|
|
project_suffixes = ["RFA-gpt3-5", "RFA-mistral", "FAR-gpt3-5", "FAR-mistral", "FA"] |
|
text_columns = ["conversation_RFA_gpt3_5", "conversation_RFA_mistral", "conversation_FAR_gpt3_5", |
|
"conversation_FAR_mistral", "conversation_FA"] |
|
|
|
|
|
output_dir = "./autotrain_configs" |
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
for project_suffix, text_column in zip(project_suffixes, text_columns): |
|
|
|
config = config_template.copy() |
|
config["project_name"] = f"mistral-v03-poe-{project_suffix}" |
|
config["data"]["column_mapping"]["text_column"] = text_column |
|
|
|
|
|
config_path = os.path.join(output_dir, f"{text_column}.yml") |
|
with open(config_path, "w") as f: |
|
yaml.dump(config, f) |
|
|
|
|
|
print(f"Running autotrain with config: {config_path}") |
|
subprocess.run(["autotrain", "--config", config_path]) |
|
|