derek-thomas
commited on
Commit
·
dcfeee1
1
Parent(s):
5613cc4
Updating for falcon
Browse files- 02-autotrain.ipynb +38 -38
02-autotrain.ipynb
CHANGED
@@ -43,14 +43,14 @@
|
|
43 |
},
|
44 |
{
|
45 |
"cell_type": "code",
|
46 |
-
"execution_count":
|
47 |
"id": "6992324b-173c-4335-b557-cf78fbb2dd93",
|
48 |
"metadata": {},
|
49 |
"outputs": [
|
50 |
{
|
51 |
"data": {
|
52 |
"application/vnd.jupyter.widget-view+json": {
|
53 |
-
"model_id": "
|
54 |
"version_major": 2,
|
55 |
"version_minor": 0
|
56 |
},
|
@@ -88,7 +88,7 @@
|
|
88 |
},
|
89 |
{
|
90 |
"cell_type": "code",
|
91 |
-
"execution_count":
|
92 |
"id": "dc2a8514-51c1-404b-8cfa-6637cc810668",
|
93 |
"metadata": {},
|
94 |
"outputs": [],
|
@@ -143,14 +143,14 @@
|
|
143 |
},
|
144 |
{
|
145 |
"cell_type": "code",
|
146 |
-
"execution_count":
|
147 |
"id": "957eb2b7-feec-422f-ba46-b293d9a77c1b",
|
148 |
"metadata": {},
|
149 |
"outputs": [],
|
150 |
"source": [
|
151 |
-
"project_suffixes = [\"RFA-gpt3-5\", \"RFA-
|
152 |
-
"text_columns = [\"conversation_RFA_gpt3_5\", \"
|
153 |
-
" \"
|
154 |
]
|
155 |
},
|
156 |
{
|
@@ -163,7 +163,7 @@
|
|
163 |
},
|
164 |
{
|
165 |
"cell_type": "code",
|
166 |
-
"execution_count":
|
167 |
"id": "b86702bf-f494-4951-863e-be5b8462fbd1",
|
168 |
"metadata": {},
|
169 |
"outputs": [],
|
@@ -182,7 +182,7 @@
|
|
182 |
},
|
183 |
{
|
184 |
"cell_type": "code",
|
185 |
-
"execution_count":
|
186 |
"id": "025ccd2f-de54-4ac2-9f36-f606876dcd3c",
|
187 |
"metadata": {},
|
188 |
"outputs": [
|
@@ -191,35 +191,35 @@
|
|
191 |
"output_type": "stream",
|
192 |
"text": [
|
193 |
"Running autotrain with config: ./autotrain_configs/conversation_RFA_gpt3_5.yml\n",
|
194 |
-
"INFO |
|
195 |
-
"INFO |
|
196 |
-
"INFO |
|
197 |
-
"INFO |
|
198 |
-
"INFO |
|
199 |
-
"Running autotrain with config: ./autotrain_configs/
|
200 |
-
"INFO |
|
201 |
-
"INFO |
|
202 |
-
"INFO |
|
203 |
-
"INFO |
|
204 |
-
"INFO |
|
205 |
"Running autotrain with config: ./autotrain_configs/conversation_FAR_gpt3_5.yml\n",
|
206 |
-
"INFO |
|
207 |
-
"INFO |
|
208 |
-
"INFO |
|
209 |
-
"INFO |
|
210 |
-
"INFO |
|
211 |
-
"Running autotrain with config: ./autotrain_configs/
|
212 |
-
"INFO |
|
213 |
-
"INFO |
|
214 |
-
"INFO |
|
215 |
-
"INFO |
|
216 |
-
"INFO |
|
217 |
"Running autotrain with config: ./autotrain_configs/conversation_FA.yml\n",
|
218 |
-
"INFO |
|
219 |
-
"INFO |
|
220 |
-
"INFO |
|
221 |
-
"INFO |
|
222 |
-
"INFO |
|
223 |
]
|
224 |
}
|
225 |
],
|
@@ -228,7 +228,7 @@
|
|
228 |
"for project_suffix, text_column in zip(project_suffixes, text_columns):\n",
|
229 |
" # Modify the config\n",
|
230 |
" config = config_template.copy()\n",
|
231 |
-
" config[\"project_name\"] = f\"
|
232 |
" config[\"data\"][\"column_mapping\"][\"text_column\"] = text_column\n",
|
233 |
"\n",
|
234 |
" # Save the config to a YAML file\n",
|
@@ -266,7 +266,7 @@
|
|
266 |
"name": "python",
|
267 |
"nbconvert_exporter": "python",
|
268 |
"pygments_lexer": "ipython3",
|
269 |
-
"version": "3.11.
|
270 |
}
|
271 |
},
|
272 |
"nbformat": 4,
|
|
|
43 |
},
|
44 |
{
|
45 |
"cell_type": "code",
|
46 |
+
"execution_count": 2,
|
47 |
"id": "6992324b-173c-4335-b557-cf78fbb2dd93",
|
48 |
"metadata": {},
|
49 |
"outputs": [
|
50 |
{
|
51 |
"data": {
|
52 |
"application/vnd.jupyter.widget-view+json": {
|
53 |
+
"model_id": "b5441f4018234a25a299775d77f880b3",
|
54 |
"version_major": 2,
|
55 |
"version_minor": 0
|
56 |
},
|
|
|
88 |
},
|
89 |
{
|
90 |
"cell_type": "code",
|
91 |
+
"execution_count": 3,
|
92 |
"id": "dc2a8514-51c1-404b-8cfa-6637cc810668",
|
93 |
"metadata": {},
|
94 |
"outputs": [],
|
|
|
143 |
},
|
144 |
{
|
145 |
"cell_type": "code",
|
146 |
+
"execution_count": 4,
|
147 |
"id": "957eb2b7-feec-422f-ba46-b293d9a77c1b",
|
148 |
"metadata": {},
|
149 |
"outputs": [],
|
150 |
"source": [
|
151 |
+
"project_suffixes = [\"RFA-gpt3-5\", \"RFA-falcon\", \"FAR-gpt3-5\", \"FAR-falcon\", \"FA\"]\n",
|
152 |
+
"text_columns = [\"conversation_RFA_gpt3_5\", \"conversation_RFA_falcon\", \"conversation_FAR_gpt3_5\",\n",
|
153 |
+
" \"conversation_FAR_falcon\", \"conversation_FA\"]"
|
154 |
]
|
155 |
},
|
156 |
{
|
|
|
163 |
},
|
164 |
{
|
165 |
"cell_type": "code",
|
166 |
+
"execution_count": 5,
|
167 |
"id": "b86702bf-f494-4951-863e-be5b8462fbd1",
|
168 |
"metadata": {},
|
169 |
"outputs": [],
|
|
|
182 |
},
|
183 |
{
|
184 |
"cell_type": "code",
|
185 |
+
"execution_count": 6,
|
186 |
"id": "025ccd2f-de54-4ac2-9f36-f606876dcd3c",
|
187 |
"metadata": {},
|
188 |
"outputs": [
|
|
|
191 |
"output_type": "stream",
|
192 |
"text": [
|
193 |
"Running autotrain with config: ./autotrain_configs/conversation_RFA_gpt3_5.yml\n",
|
194 |
+
"INFO | 2025-01-08 10:20:38 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_RFA_gpt3_5.yml\n",
|
195 |
+
"INFO | 2025-01-08 10:20:38 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
|
196 |
+
"INFO | 2025-01-08 10:20:38 | autotrain.parser:__post_init__:166 - Using backend: spaces-l4x1\n",
|
197 |
+
"INFO | 2025-01-08 10:20:38 | autotrain.parser:run:224 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'falcon-v03-poe-RFA-gpt3-5', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_RFA_gpt3_5', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
|
198 |
+
"INFO | 2025-01-08 10:20:43 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-RFA-gpt3-5\n",
|
199 |
+
"Running autotrain with config: ./autotrain_configs/conversation_RFA_falcon.yml\n",
|
200 |
+
"INFO | 2025-01-08 10:20:46 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_RFA_falcon.yml\n",
|
201 |
+
"INFO | 2025-01-08 10:20:46 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
|
202 |
+
"INFO | 2025-01-08 10:20:46 | autotrain.parser:__post_init__:166 - Using backend: spaces-l4x1\n",
|
203 |
+
"INFO | 2025-01-08 10:20:46 | autotrain.parser:run:224 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'falcon-v03-poe-RFA-falcon', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_RFA_falcon', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
|
204 |
+
"INFO | 2025-01-08 10:20:53 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-RFA-falcon\n",
|
205 |
"Running autotrain with config: ./autotrain_configs/conversation_FAR_gpt3_5.yml\n",
|
206 |
+
"INFO | 2025-01-08 10:20:56 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_FAR_gpt3_5.yml\n",
|
207 |
+
"INFO | 2025-01-08 10:20:56 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
|
208 |
+
"INFO | 2025-01-08 10:20:56 | autotrain.parser:__post_init__:166 - Using backend: spaces-l4x1\n",
|
209 |
+
"INFO | 2025-01-08 10:20:56 | autotrain.parser:run:224 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'falcon-v03-poe-FAR-gpt3-5', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FAR_gpt3_5', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
|
210 |
+
"INFO | 2025-01-08 10:21:02 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-FAR-gpt3-5\n",
|
211 |
+
"Running autotrain with config: ./autotrain_configs/conversation_FAR_falcon.yml\n",
|
212 |
+
"INFO | 2025-01-08 10:21:05 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_FAR_falcon.yml\n",
|
213 |
+
"INFO | 2025-01-08 10:21:05 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
|
214 |
+
"INFO | 2025-01-08 10:21:05 | autotrain.parser:__post_init__:166 - Using backend: spaces-l4x1\n",
|
215 |
+
"INFO | 2025-01-08 10:21:05 | autotrain.parser:run:224 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'falcon-v03-poe-FAR-falcon', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FAR_falcon', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
|
216 |
+
"INFO | 2025-01-08 10:21:12 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-FAR-falcon\n",
|
217 |
"Running autotrain with config: ./autotrain_configs/conversation_FA.yml\n",
|
218 |
+
"INFO | 2025-01-08 10:21:15 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_FA.yml\n",
|
219 |
+
"INFO | 2025-01-08 10:21:15 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
|
220 |
+
"INFO | 2025-01-08 10:21:15 | autotrain.parser:__post_init__:166 - Using backend: spaces-l4x1\n",
|
221 |
+
"INFO | 2025-01-08 10:21:15 | autotrain.parser:run:224 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'falcon-v03-poe-FA', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FA', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
|
222 |
+
"INFO | 2025-01-08 10:21:22 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-FA\n"
|
223 |
]
|
224 |
}
|
225 |
],
|
|
|
228 |
"for project_suffix, text_column in zip(project_suffixes, text_columns):\n",
|
229 |
" # Modify the config\n",
|
230 |
" config = config_template.copy()\n",
|
231 |
+
" config[\"project_name\"] = f\"falcon-v03-poe-{project_suffix}\"\n",
|
232 |
" config[\"data\"][\"column_mapping\"][\"text_column\"] = text_column\n",
|
233 |
"\n",
|
234 |
" # Save the config to a YAML file\n",
|
|
|
266 |
"name": "python",
|
267 |
"nbconvert_exporter": "python",
|
268 |
"pygments_lexer": "ipython3",
|
269 |
+
"version": "3.11.11"
|
270 |
}
|
271 |
},
|
272 |
"nbformat": 4,
|