derek-thomas
/

prompt-order-experiment

Model card Files Files and versions Community

prompt-order-experiment / autotrain /run_autotrain.py

derek-thomas

init commit

3916521 5 months ago

2.05 kB

	import os
	import subprocess

	import yaml

	# Base config
	config_template = {
	"task": "llm-sft",
	"base_model": "mistralai/Mistral-7B-Instruct-v0.3",
	"project_name": "",
	"log": "tensorboard",
	"backend": "spaces-l4x1",
	"data": {
	"path": "derek-thomas/labeled-multiple-choice-explained-mistral-tokenized",
	"train_split": "train",
	"valid_split": None,
	"chat_template": "none",
	"column_mapping": {
	"text_column": ""
	},
	},
	"params": {
	"block_size": 1024,
	"model_max_length": 1024,
	"epochs": 2,
	"batch_size": 1,
	"lr": 3e-5,
	"peft": True,
	"quantization": "int4",
	"target_modules": "all-linear",
	"padding": "left",
	"optimizer": "adamw_torch",
	"scheduler": "linear",
	"gradient_accumulation": 8,
	"mixed_precision": "bf16",
	},
	"hub": {
	"username": "derek-thomas",
	"token": os.getenv('HF_TOKEN'),
	"push_to_hub": True,
	},
	}

	# Suffix options
	project_suffixes = ["RFA-gpt3-5", "RFA-mistral", "FAR-gpt3-5", "FAR-mistral", "FA"]
	text_columns = ["conversation_RFA_gpt3_5", "conversation_RFA_mistral", "conversation_FAR_gpt3_5",
	"conversation_FAR_mistral", "conversation_FA"]

	# Directory to store generated configs
	output_dir = "./autotrain_configs"
	os.makedirs(output_dir, exist_ok=True)

	# Generate configs and run commands
	for project_suffix, text_column in zip(project_suffixes, text_columns):
	# Modify the config
	config = config_template.copy()
	config["project_name"] = f"mistral-v03-poe-{project_suffix}"
	config["data"]["column_mapping"]["text_column"] = text_column

	# Save the config to a YAML file
	config_path = os.path.join(output_dir, f"{text_column}.yml")
	with open(config_path, "w") as f:
	yaml.dump(config, f)

	# Run the command
	print(f"Running autotrain with config: {config_path}")
	subprocess.run(["autotrain", "--config", config_path])