derek-thomas commited on
Commit
dcfeee1
·
1 Parent(s): 5613cc4

Updating for falcon

Browse files
Files changed (1) hide show
  1. 02-autotrain.ipynb +38 -38
02-autotrain.ipynb CHANGED
@@ -43,14 +43,14 @@
43
  },
44
  {
45
  "cell_type": "code",
46
- "execution_count": 3,
47
  "id": "6992324b-173c-4335-b557-cf78fbb2dd93",
48
  "metadata": {},
49
  "outputs": [
50
  {
51
  "data": {
52
  "application/vnd.jupyter.widget-view+json": {
53
- "model_id": "24ea5bd118ed4632a6ad859c4c976e66",
54
  "version_major": 2,
55
  "version_minor": 0
56
  },
@@ -88,7 +88,7 @@
88
  },
89
  {
90
  "cell_type": "code",
91
- "execution_count": 4,
92
  "id": "dc2a8514-51c1-404b-8cfa-6637cc810668",
93
  "metadata": {},
94
  "outputs": [],
@@ -143,14 +143,14 @@
143
  },
144
  {
145
  "cell_type": "code",
146
- "execution_count": 5,
147
  "id": "957eb2b7-feec-422f-ba46-b293d9a77c1b",
148
  "metadata": {},
149
  "outputs": [],
150
  "source": [
151
- "project_suffixes = [\"RFA-gpt3-5\", \"RFA-mistral\", \"FAR-gpt3-5\", \"FAR-mistral\", \"FA\"]\n",
152
- "text_columns = [\"conversation_RFA_gpt3_5\", \"conversation_RFA_mistral\", \"conversation_FAR_gpt3_5\",\n",
153
- " \"conversation_FAR_mistral\", \"conversation_FA\"]"
154
  ]
155
  },
156
  {
@@ -163,7 +163,7 @@
163
  },
164
  {
165
  "cell_type": "code",
166
- "execution_count": 6,
167
  "id": "b86702bf-f494-4951-863e-be5b8462fbd1",
168
  "metadata": {},
169
  "outputs": [],
@@ -182,7 +182,7 @@
182
  },
183
  {
184
  "cell_type": "code",
185
- "execution_count": 9,
186
  "id": "025ccd2f-de54-4ac2-9f36-f606876dcd3c",
187
  "metadata": {},
188
  "outputs": [
@@ -191,35 +191,35 @@
191
  "output_type": "stream",
192
  "text": [
193
  "Running autotrain with config: ./autotrain_configs/conversation_RFA_gpt3_5.yml\n",
194
- "INFO | 2024-12-12 20:45:45 | autotrain.cli.autotrain:main:60 - Using AutoTrain configuration: ./autotrain_configs/conversation_RFA_gpt3_5.yml\n",
195
- "INFO | 2024-12-12 20:45:45 | autotrain.parser:__post_init__:170 - Running task: lm_training\n",
196
- "INFO | 2024-12-12 20:45:45 | autotrain.parser:__post_init__:171 - Using backend: spaces-l4x1\n",
197
- "INFO | 2024-12-12 20:45:45 | autotrain.parser:run:234 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'mistral-v03-poe-RFA-gpt3-5', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_RFA_gpt3_5', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
198
- "INFO | 2024-12-12 20:45:52 | autotrain.parser:run:239 - Job ID: derek-thomas/autotrain-mistral-v03-poe-RFA-gpt3-5\n",
199
- "Running autotrain with config: ./autotrain_configs/conversation_RFA_mistral.yml\n",
200
- "INFO | 2024-12-12 20:45:56 | autotrain.cli.autotrain:main:60 - Using AutoTrain configuration: ./autotrain_configs/conversation_RFA_mistral.yml\n",
201
- "INFO | 2024-12-12 20:45:56 | autotrain.parser:__post_init__:170 - Running task: lm_training\n",
202
- "INFO | 2024-12-12 20:45:56 | autotrain.parser:__post_init__:171 - Using backend: spaces-l4x1\n",
203
- "INFO | 2024-12-12 20:45:56 | autotrain.parser:run:234 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'mistral-v03-poe-RFA-mistral', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_RFA_mistral', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
204
- "INFO | 2024-12-12 20:46:01 | autotrain.parser:run:239 - Job ID: derek-thomas/autotrain-mistral-v03-poe-RFA-mistral\n",
205
  "Running autotrain with config: ./autotrain_configs/conversation_FAR_gpt3_5.yml\n",
206
- "INFO | 2024-12-12 20:46:05 | autotrain.cli.autotrain:main:60 - Using AutoTrain configuration: ./autotrain_configs/conversation_FAR_gpt3_5.yml\n",
207
- "INFO | 2024-12-12 20:46:05 | autotrain.parser:__post_init__:170 - Running task: lm_training\n",
208
- "INFO | 2024-12-12 20:46:05 | autotrain.parser:__post_init__:171 - Using backend: spaces-l4x1\n",
209
- "INFO | 2024-12-12 20:46:05 | autotrain.parser:run:234 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'mistral-v03-poe-FAR-gpt3-5', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FAR_gpt3_5', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
210
- "INFO | 2024-12-12 20:46:12 | autotrain.parser:run:239 - Job ID: derek-thomas/autotrain-mistral-v03-poe-FAR-gpt3-5\n",
211
- "Running autotrain with config: ./autotrain_configs/conversation_FAR_mistral.yml\n",
212
- "INFO | 2024-12-12 20:46:16 | autotrain.cli.autotrain:main:60 - Using AutoTrain configuration: ./autotrain_configs/conversation_FAR_mistral.yml\n",
213
- "INFO | 2024-12-12 20:46:16 | autotrain.parser:__post_init__:170 - Running task: lm_training\n",
214
- "INFO | 2024-12-12 20:46:16 | autotrain.parser:__post_init__:171 - Using backend: spaces-l4x1\n",
215
- "INFO | 2024-12-12 20:46:16 | autotrain.parser:run:234 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'mistral-v03-poe-FAR-mistral', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FAR_mistral', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
216
- "INFO | 2024-12-12 20:46:22 | autotrain.parser:run:239 - Job ID: derek-thomas/autotrain-mistral-v03-poe-FAR-mistral\n",
217
  "Running autotrain with config: ./autotrain_configs/conversation_FA.yml\n",
218
- "INFO | 2024-12-12 20:46:25 | autotrain.cli.autotrain:main:60 - Using AutoTrain configuration: ./autotrain_configs/conversation_FA.yml\n",
219
- "INFO | 2024-12-12 20:46:25 | autotrain.parser:__post_init__:170 - Running task: lm_training\n",
220
- "INFO | 2024-12-12 20:46:25 | autotrain.parser:__post_init__:171 - Using backend: spaces-l4x1\n",
221
- "INFO | 2024-12-12 20:46:25 | autotrain.parser:run:234 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'mistral-v03-poe-FA', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FA', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
222
- "INFO | 2024-12-12 20:46:31 | autotrain.parser:run:239 - Job ID: derek-thomas/autotrain-mistral-v03-poe-FA\n"
223
  ]
224
  }
225
  ],
@@ -228,7 +228,7 @@
228
  "for project_suffix, text_column in zip(project_suffixes, text_columns):\n",
229
  " # Modify the config\n",
230
  " config = config_template.copy()\n",
231
- " config[\"project_name\"] = f\"mistral-v03-poe-{project_suffix}\"\n",
232
  " config[\"data\"][\"column_mapping\"][\"text_column\"] = text_column\n",
233
  "\n",
234
  " # Save the config to a YAML file\n",
@@ -266,7 +266,7 @@
266
  "name": "python",
267
  "nbconvert_exporter": "python",
268
  "pygments_lexer": "ipython3",
269
- "version": "3.11.10"
270
  }
271
  },
272
  "nbformat": 4,
 
43
  },
44
  {
45
  "cell_type": "code",
46
+ "execution_count": 2,
47
  "id": "6992324b-173c-4335-b557-cf78fbb2dd93",
48
  "metadata": {},
49
  "outputs": [
50
  {
51
  "data": {
52
  "application/vnd.jupyter.widget-view+json": {
53
+ "model_id": "b5441f4018234a25a299775d77f880b3",
54
  "version_major": 2,
55
  "version_minor": 0
56
  },
 
88
  },
89
  {
90
  "cell_type": "code",
91
+ "execution_count": 3,
92
  "id": "dc2a8514-51c1-404b-8cfa-6637cc810668",
93
  "metadata": {},
94
  "outputs": [],
 
143
  },
144
  {
145
  "cell_type": "code",
146
+ "execution_count": 4,
147
  "id": "957eb2b7-feec-422f-ba46-b293d9a77c1b",
148
  "metadata": {},
149
  "outputs": [],
150
  "source": [
151
+ "project_suffixes = [\"RFA-gpt3-5\", \"RFA-falcon\", \"FAR-gpt3-5\", \"FAR-falcon\", \"FA\"]\n",
152
+ "text_columns = [\"conversation_RFA_gpt3_5\", \"conversation_RFA_falcon\", \"conversation_FAR_gpt3_5\",\n",
153
+ " \"conversation_FAR_falcon\", \"conversation_FA\"]"
154
  ]
155
  },
156
  {
 
163
  },
164
  {
165
  "cell_type": "code",
166
+ "execution_count": 5,
167
  "id": "b86702bf-f494-4951-863e-be5b8462fbd1",
168
  "metadata": {},
169
  "outputs": [],
 
182
  },
183
  {
184
  "cell_type": "code",
185
+ "execution_count": 6,
186
  "id": "025ccd2f-de54-4ac2-9f36-f606876dcd3c",
187
  "metadata": {},
188
  "outputs": [
 
191
  "output_type": "stream",
192
  "text": [
193
  "Running autotrain with config: ./autotrain_configs/conversation_RFA_gpt3_5.yml\n",
194
+ "INFO | 2025-01-08 10:20:38 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_RFA_gpt3_5.yml\n",
195
+ "INFO | 2025-01-08 10:20:38 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
196
+ "INFO | 2025-01-08 10:20:38 | autotrain.parser:__post_init__:166 - Using backend: spaces-l4x1\n",
197
+ "INFO | 2025-01-08 10:20:38 | autotrain.parser:run:224 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'falcon-v03-poe-RFA-gpt3-5', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_RFA_gpt3_5', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
198
+ "INFO | 2025-01-08 10:20:43 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-RFA-gpt3-5\n",
199
+ "Running autotrain with config: ./autotrain_configs/conversation_RFA_falcon.yml\n",
200
+ "INFO | 2025-01-08 10:20:46 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_RFA_falcon.yml\n",
201
+ "INFO | 2025-01-08 10:20:46 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
202
+ "INFO | 2025-01-08 10:20:46 | autotrain.parser:__post_init__:166 - Using backend: spaces-l4x1\n",
203
+ "INFO | 2025-01-08 10:20:46 | autotrain.parser:run:224 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'falcon-v03-poe-RFA-falcon', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_RFA_falcon', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
204
+ "INFO | 2025-01-08 10:20:53 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-RFA-falcon\n",
205
  "Running autotrain with config: ./autotrain_configs/conversation_FAR_gpt3_5.yml\n",
206
+ "INFO | 2025-01-08 10:20:56 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_FAR_gpt3_5.yml\n",
207
+ "INFO | 2025-01-08 10:20:56 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
208
+ "INFO | 2025-01-08 10:20:56 | autotrain.parser:__post_init__:166 - Using backend: spaces-l4x1\n",
209
+ "INFO | 2025-01-08 10:20:56 | autotrain.parser:run:224 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'falcon-v03-poe-FAR-gpt3-5', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FAR_gpt3_5', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
210
+ "INFO | 2025-01-08 10:21:02 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-FAR-gpt3-5\n",
211
+ "Running autotrain with config: ./autotrain_configs/conversation_FAR_falcon.yml\n",
212
+ "INFO | 2025-01-08 10:21:05 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_FAR_falcon.yml\n",
213
+ "INFO | 2025-01-08 10:21:05 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
214
+ "INFO | 2025-01-08 10:21:05 | autotrain.parser:__post_init__:166 - Using backend: spaces-l4x1\n",
215
+ "INFO | 2025-01-08 10:21:05 | autotrain.parser:run:224 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'falcon-v03-poe-FAR-falcon', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FAR_falcon', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
216
+ "INFO | 2025-01-08 10:21:12 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-FAR-falcon\n",
217
  "Running autotrain with config: ./autotrain_configs/conversation_FA.yml\n",
218
+ "INFO | 2025-01-08 10:21:15 | autotrain.cli.autotrain:main:58 - Using AutoTrain configuration: ./autotrain_configs/conversation_FA.yml\n",
219
+ "INFO | 2025-01-08 10:21:15 | autotrain.parser:__post_init__:165 - Running task: lm_training\n",
220
+ "INFO | 2025-01-08 10:21:15 | autotrain.parser:__post_init__:166 - Using backend: spaces-l4x1\n",
221
+ "INFO | 2025-01-08 10:21:15 | autotrain.parser:run:224 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'falcon-v03-poe-FA', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FA', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
222
+ "INFO | 2025-01-08 10:21:22 | autotrain.parser:run:229 - Job ID: derek-thomas/autotrain-falcon-v03-poe-FA\n"
223
  ]
224
  }
225
  ],
 
228
  "for project_suffix, text_column in zip(project_suffixes, text_columns):\n",
229
  " # Modify the config\n",
230
  " config = config_template.copy()\n",
231
+ " config[\"project_name\"] = f\"falcon-v03-poe-{project_suffix}\"\n",
232
  " config[\"data\"][\"column_mapping\"][\"text_column\"] = text_column\n",
233
  "\n",
234
  " # Save the config to a YAML file\n",
 
266
  "name": "python",
267
  "nbconvert_exporter": "python",
268
  "pygments_lexer": "ipython3",
269
+ "version": "3.11.11"
270
  }
271
  },
272
  "nbformat": 4,