George-API commited on
Commit
bf7bd7e
·
verified ·
1 Parent(s): 2b5da3a

Upload folder using huggingface_hub

Browse files
run_transformers_training.py CHANGED
@@ -1,5 +1,5 @@
1
- #!/usr/bin/env python
2
- # coding=utf-8
3
 
4
  # Basic Python imports
5
  import os
@@ -12,6 +12,10 @@ import time
12
  import warnings
13
  from importlib.util import find_spec
14
  import multiprocessing
 
 
 
 
15
 
16
  # Check hardware capabilities first
17
  CUDA_AVAILABLE = "CUDA_VISIBLE_DEVICES" in os.environ or os.environ.get("NVIDIA_VISIBLE_DEVICES") != ""
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
 
4
  # Basic Python imports
5
  import os
 
12
  import warnings
13
  from importlib.util import find_spec
14
  import multiprocessing
15
+ import torch
16
+ import random
17
+ import numpy as np
18
+ from tqdm import tqdm
19
 
20
  # Check hardware capabilities first
21
  CUDA_AVAILABLE = "CUDA_VISIBLE_DEVICES" in os.environ or os.environ.get("NVIDIA_VISIBLE_DEVICES") != ""
transformers_config.json CHANGED
@@ -67,7 +67,14 @@
67
  "offload_params": false
68
  },
69
  "ddp_find_unused_parameters": false,
70
- "dataloader_num_workers": 2
 
 
 
 
 
 
 
71
  },
72
 
73
  "logging": {
@@ -112,11 +119,11 @@
112
  "use_gradient_checkpointing": true,
113
  "use_flash_attention": true
114
  },
115
- "multi_gpu_strategy": "data_parallel"
116
  },
117
  "system_settings": {
118
- "cuda_memory_fraction": 0.85,
119
- "dataloader_num_workers": 2,
120
  "dataloader_pin_memory": true
121
  },
122
  "memory_breakdown": {
 
67
  "offload_params": false
68
  },
69
  "ddp_find_unused_parameters": false,
70
+ "dataloader_num_workers": 2,
71
+ "ddp_config": {
72
+ "enabled": true,
73
+ "backend": "nccl",
74
+ "find_unused_parameters": false,
75
+ "broadcast_buffers": false,
76
+ "gradient_as_bucket_view": true
77
+ }
78
  },
79
 
80
  "logging": {
 
119
  "use_gradient_checkpointing": true,
120
  "use_flash_attention": true
121
  },
122
+ "multi_gpu_strategy": "ddp"
123
  },
124
  "system_settings": {
125
+ "cuda_memory_fraction": 0.75,
126
+ "dataloader_num_workers": 4,
127
  "dataloader_pin_memory": true
128
  },
129
  "memory_breakdown": {