Grafted experiment: PyTorch model trained with tfds imagenet2012 data
Browse files- .gitattributes +1 -0
- grafted/big_vision_metrics.txt +0 -0
- grafted/checkpoint.pth.tar +3 -0
- grafted/config.json +1 -0
- grafted/wandb/run-20241127_213238-torch-grafted-redux/files/config.yaml +95 -0
- grafted/wandb/run-20241127_213238-torch-grafted-redux/files/output.log +31 -0
- grafted/wandb/run-20241127_213238-torch-grafted-redux/files/requirements.txt +133 -0
- grafted/wandb/run-20241127_213238-torch-grafted-redux/files/wandb-metadata.json +49 -0
- grafted/wandb/run-20241127_213238-torch-grafted-redux/files/wandb-summary.json +1 -0
- grafted/wandb/run-20241127_213238-torch-grafted-redux/logs/debug-internal.log +18 -0
- grafted/wandb/run-20241127_213238-torch-grafted-redux/logs/debug.log +54 -0
- grafted/wandb/run-20241127_213238-torch-grafted-redux/run-torch-grafted-redux.wandb +0 -0
- grafted/wandb/run-20241127_213407-torch-grafted-redux/files/config.yaml +97 -0
- grafted/wandb/run-20241127_213407-torch-grafted-redux/files/output.log +31 -0
- grafted/wandb/run-20241127_213407-torch-grafted-redux/files/requirements.txt +133 -0
- grafted/wandb/run-20241127_213407-torch-grafted-redux/files/wandb-metadata.json +49 -0
- grafted/wandb/run-20241127_213407-torch-grafted-redux/files/wandb-summary.json +1 -0
- grafted/wandb/run-20241127_213407-torch-grafted-redux/logs/debug-internal.log +18 -0
- grafted/wandb/run-20241127_213407-torch-grafted-redux/logs/debug.log +55 -0
- grafted/wandb/run-20241127_213407-torch-grafted-redux/run-torch-grafted-redux.wandb +0 -0
- grafted/wandb/run-20241127_213838-torch-grafted-redux/files/config.yaml +97 -0
- grafted/wandb/run-20241127_213838-torch-grafted-redux/files/output.log +152 -0
- grafted/wandb/run-20241127_213838-torch-grafted-redux/files/requirements.txt +133 -0
- grafted/wandb/run-20241127_213838-torch-grafted-redux/files/wandb-metadata.json +49 -0
- grafted/wandb/run-20241127_213838-torch-grafted-redux/files/wandb-summary.json +1 -0
- grafted/wandb/run-20241127_213838-torch-grafted-redux/logs/debug-internal.log +18 -0
- grafted/wandb/run-20241127_213838-torch-grafted-redux/logs/debug.log +55 -0
- grafted/wandb/run-20241127_213838-torch-grafted-redux/run-torch-grafted-redux.wandb +0 -0
- grafted/wandb/run-20241127_214327-torch-grafted-redux/files/config.yaml +97 -0
- grafted/wandb/run-20241127_214327-torch-grafted-redux/files/output.log +211 -0
- grafted/wandb/run-20241127_214327-torch-grafted-redux/files/requirements.txt +134 -0
- grafted/wandb/run-20241127_214327-torch-grafted-redux/files/wandb-metadata.json +49 -0
- grafted/wandb/run-20241127_214327-torch-grafted-redux/files/wandb-summary.json +1 -0
- grafted/wandb/run-20241127_214327-torch-grafted-redux/logs/debug-internal.log +18 -0
- grafted/wandb/run-20241127_214327-torch-grafted-redux/logs/debug.log +55 -0
- grafted/wandb/run-20241127_214327-torch-grafted-redux/run-torch-grafted-redux.wandb +0 -0
- grafted/wandb/run-20241127_215015-torch-grafted-redux/files/config.yaml +98 -0
- grafted/wandb/run-20241127_215015-torch-grafted-redux/files/output.log +0 -0
- grafted/wandb/run-20241127_215015-torch-grafted-redux/files/requirements.txt +134 -0
- grafted/wandb/run-20241127_215015-torch-grafted-redux/files/wandb-metadata.json +49 -0
- grafted/wandb/run-20241127_215015-torch-grafted-redux/files/wandb-summary.json +1 -0
- grafted/wandb/run-20241127_215015-torch-grafted-redux/logs/debug-internal.log +247 -0
- grafted/wandb/run-20241127_215015-torch-grafted-redux/logs/debug.log +55 -0
- grafted/wandb/run-20241127_215015-torch-grafted-redux/run-torch-grafted-redux.wandb +3 -0
- grafted/wandb/wandb-resume.json +1 -0
.gitattributes
CHANGED
@@ -35,3 +35,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
05-15_2003/wandb/run-20240515_200355-torch-on-big-vision-bfloat16/logs/debug-internal.log filter=lfs diff=lfs merge=lfs -text
|
37 |
05-15_2003/wandb/run-20240515_200355-torch-on-big-vision-bfloat16/run-torch-on-big-vision-bfloat16.wandb filter=lfs diff=lfs merge=lfs -text
|
|
|
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
05-15_2003/wandb/run-20240515_200355-torch-on-big-vision-bfloat16/logs/debug-internal.log filter=lfs diff=lfs merge=lfs -text
|
37 |
05-15_2003/wandb/run-20240515_200355-torch-on-big-vision-bfloat16/run-torch-on-big-vision-bfloat16.wandb filter=lfs diff=lfs merge=lfs -text
|
38 |
+
grafted/wandb/run-20241127_215015-torch-grafted-redux/run-torch-grafted-redux.wandb filter=lfs diff=lfs merge=lfs -text
|
grafted/big_vision_metrics.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
grafted/checkpoint.pth.tar
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b1a8535185a902993c4e798b63171f29063335d6073303d6f357609c25b6aea7
|
3 |
+
size 264187339
|
grafted/config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"seed": 0, "total_epochs": 90, "num_classes": 1000, "loss": "softmax_xent", "input": {"data": {"name": "imagenet2012", "split": "train"}, "batch_size": 1024, "accum_freq": 8, "cache_raw": false, "shuffle_buffer_size": 150000, "pp": "decode_jpeg_and_inception_crop(224)|flip_lr|randaug(2,10)|value_range(-1, 1)|onehot(1000, key=\"label\", key_result=\"labels\")|keep(\"image\", \"labels\")"}, "pp_modules": ["ops_general", "ops_image", "ops_text", "archive.randaug"], "log_training_steps": 50, "ckpt_steps": 1000, "model_name": "vit", "model": {"variant": "S/16", "rep_size": false, "pool_type": "gap", "posemb": "sincos2d"}, "grad_clip_norm": 1.0, "optax_name": "scale_by_adam", "optax": {"mu_dtype": "bfloat16"}, "lr": 0.001, "wd": 0.0001, "schedule": {"warmup_steps": 10000, "decay_type": "cosine"}, "mixup": {"p": 0.2, "fold_in": null}, "evals": {"val": {"type": "classification", "data": {"name": "imagenet2012", "split": "validation"}, "pp_fn": "decode|resize_small(256)|central_crop(224)|value_range(-1, 1)|onehot(1000, key=\"label\", key_result=\"labels\")|keep(\"image\", \"labels\")", "loss_name": "softmax_xent", "log_steps": 2500}}}
|
grafted/wandb/run-20241127_213238-torch-grafted-redux/files/config.yaml
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_allow_dotted_keys:
|
2 |
+
value: false
|
3 |
+
_convert_dict:
|
4 |
+
value: true
|
5 |
+
_fields:
|
6 |
+
value:
|
7 |
+
ckpt_steps: 1000
|
8 |
+
evals: |
|
9 |
+
val:
|
10 |
+
data:
|
11 |
+
name: imagenet2012
|
12 |
+
split: validation
|
13 |
+
log_steps: 2500
|
14 |
+
loss_name: softmax_xent
|
15 |
+
pp_fn: decode|resize_small(256)|central_crop(224)|value_range(-1, 1)|onehot(1000,
|
16 |
+
key="label", key_result="labels")|keep("image", "labels")
|
17 |
+
type: classification
|
18 |
+
grad_clip_norm: 1
|
19 |
+
input: |
|
20 |
+
accum_freq: 8
|
21 |
+
batch_size: 1024
|
22 |
+
cache_raw: false
|
23 |
+
data:
|
24 |
+
name: imagenet2012
|
25 |
+
split: train
|
26 |
+
pp: decode_jpeg_and_inception_crop(224)|flip_lr|randaug(2,10)|value_range(-1, 1)|onehot(1000,
|
27 |
+
key="label", key_result="labels")|keep("image", "labels")
|
28 |
+
shuffle_buffer_size: 150000
|
29 |
+
log_training_steps: 50
|
30 |
+
loss: softmax_xent
|
31 |
+
lr: 0.001
|
32 |
+
mixup: |
|
33 |
+
fold_in: null
|
34 |
+
p: 0.2
|
35 |
+
model: |
|
36 |
+
pool_type: gap
|
37 |
+
posemb: sincos2d
|
38 |
+
rep_size: false
|
39 |
+
variant: S/16
|
40 |
+
model_name: vit
|
41 |
+
num_classes: 1000
|
42 |
+
optax: |
|
43 |
+
mu_dtype: bfloat16
|
44 |
+
optax_name: scale_by_adam
|
45 |
+
pp_modules:
|
46 |
+
- ops_general
|
47 |
+
- ops_image
|
48 |
+
- ops_text
|
49 |
+
- archive.randaug
|
50 |
+
schedule: |
|
51 |
+
decay_type: cosine
|
52 |
+
warmup_steps: 10000
|
53 |
+
seed: 0
|
54 |
+
total_epochs: 90
|
55 |
+
wd: 0.0001
|
56 |
+
_locked:
|
57 |
+
value: true
|
58 |
+
_sort_keys:
|
59 |
+
value: true
|
60 |
+
_type_safe:
|
61 |
+
value: true
|
62 |
+
_wandb:
|
63 |
+
value:
|
64 |
+
cli_version: 0.18.7
|
65 |
+
m: []
|
66 |
+
python_version: 3.11.10
|
67 |
+
t:
|
68 |
+
"1":
|
69 |
+
- 1
|
70 |
+
- 2
|
71 |
+
- 3
|
72 |
+
- 12
|
73 |
+
- 41
|
74 |
+
- 45
|
75 |
+
- 55
|
76 |
+
"2":
|
77 |
+
- 1
|
78 |
+
- 2
|
79 |
+
- 3
|
80 |
+
- 12
|
81 |
+
- 41
|
82 |
+
- 45
|
83 |
+
- 55
|
84 |
+
"3":
|
85 |
+
- 13
|
86 |
+
- 14
|
87 |
+
- 16
|
88 |
+
- 23
|
89 |
+
- 55
|
90 |
+
"4": 3.11.10
|
91 |
+
"5": 0.18.7
|
92 |
+
"8":
|
93 |
+
- 5
|
94 |
+
"12": 0.18.7
|
95 |
+
"13": linux-x86_64
|
grafted/wandb/run-20241127_213238-torch-grafted-redux/files/output.log
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
I1127 21:32:38.985260 139930170870656 train.py:125] [33mNOTE[0m: Initializing train dataset...
|
2 |
+
I1127 21:32:38.985419 139930170870656 train.py:125] [33mNOTE[0m: Global batch size 1024 on 1 hosts results in 1024 local batch size. With 1 dev per host (1 dev total), that's a 1024 per-device batch size.
|
3 |
+
I1127 21:32:39.139223 139930170870656 logging_logger.py:49] Constructing tf.data.Dataset imagenet2012 for split _EvenSplit(split='train', index=0, count=1, drop_remainder=False), from /home/jason-chou/tensorflow_datasets/imagenet2012/5.1.0
|
4 |
+
Traceback (most recent call last):
|
5 |
+
File "<frozen runpy>", line 198, in _run_module_as_main
|
6 |
+
File "<frozen runpy>", line 88, in _run_code
|
7 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/train.py", line 396, in <module>
|
8 |
+
app.run(main)
|
9 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/absl/app.py", line 308, in run
|
10 |
+
_run_main(main, args)
|
11 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/absl/app.py", line 254, in _run_main
|
12 |
+
sys.exit(main(argv))
|
13 |
+
^^^^^^^^^^
|
14 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/train.py", line 189, in main
|
15 |
+
train_ds, ntrain_img = input_pipeline.training(config.input)
|
16 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
17 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/input_pipeline.py", line 101, in training
|
18 |
+
data=train_data.get_tfdata(ordered=False),
|
19 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
20 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/datasets/tfds.py", line 39, in get_tfdata
|
21 |
+
return _get_dataset_from_builder(
|
22 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
23 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/datasets/tfds.py", line 70, in _get_dataset_from_builder
|
24 |
+
ds = builder.as_dataset(
|
25 |
+
^^^^^^^^^^^^^^^^^^^
|
26 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/tensorflow_datasets/core/logging/__init__.py", line 176, in __call__
|
27 |
+
return function(*args, **kwargs)
|
28 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^
|
29 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/tensorflow_datasets/core/dataset_builder.py", line 1008, in as_dataset
|
30 |
+
raise AssertionError(
|
31 |
+
AssertionError: Dataset imagenet2012: could not find data in /home/jason-chou/tensorflow_datasets. Please make sure to call dataset_builder.download_and_prepare(), or pass download=True to tfds.load() before trying to access the tf.data.Dataset object.
|
grafted/wandb/run-20241127_213238-torch-grafted-redux/files/requirements.txt
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
array_record==0.5.1
|
2 |
+
wandb==0.18.7
|
3 |
+
nvidia-curand-cu12==10.3.2.106
|
4 |
+
requests-oauthlib==2.0.0
|
5 |
+
zipp==3.21.0
|
6 |
+
Werkzeug==3.1.3
|
7 |
+
simple-parsing==0.1.6
|
8 |
+
mdurl==0.1.2
|
9 |
+
keras==2.15.0
|
10 |
+
nvidia-cuda-nvcc-cu12==12.6.85
|
11 |
+
google-auth-oauthlib==1.2.1
|
12 |
+
jaxlib==0.4.34
|
13 |
+
tf_keras==2.15.1
|
14 |
+
oauthlib==3.2.2
|
15 |
+
tensorflow-probability==0.25.0
|
16 |
+
cachetools==5.5.0
|
17 |
+
Jinja2==3.1.3
|
18 |
+
rich==13.9.4
|
19 |
+
filelock==3.13.1
|
20 |
+
google-pasta==0.2.0
|
21 |
+
optax==0.2.4
|
22 |
+
toolz==1.0.0
|
23 |
+
gast==0.6.0
|
24 |
+
tensorboard==2.15.2
|
25 |
+
pyasn1_modules==0.4.1
|
26 |
+
nvidia-cudnn-cu12==9.1.0.70
|
27 |
+
opt_einsum==3.4.0
|
28 |
+
nvidia-nvjitlink-cu12==12.6.85
|
29 |
+
chex==0.1.87
|
30 |
+
namex==0.0.8
|
31 |
+
termcolor==2.5.0
|
32 |
+
flax==0.10.2
|
33 |
+
cloudpickle==3.1.0
|
34 |
+
numpy==1.26.4
|
35 |
+
nvidia-nccl-cu12==2.21.5
|
36 |
+
tensorflow-cpu==2.15.0
|
37 |
+
nvidia-cusolver-cu12==11.4.5.107
|
38 |
+
typing_extensions==4.12.2
|
39 |
+
tensorflow-addons==0.23.0
|
40 |
+
typeguard==2.13.3
|
41 |
+
absl-py==2.1.0
|
42 |
+
flatbuffers==24.3.25
|
43 |
+
dlpack==0.1
|
44 |
+
setuptools==65.5.0
|
45 |
+
protobuf==4.25.5
|
46 |
+
jax-cuda12-plugin==0.4.35
|
47 |
+
tensorflow==2.15.0
|
48 |
+
msgpack==1.1.0
|
49 |
+
networkx==3.2.1
|
50 |
+
docker-pycreds==0.4.0
|
51 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
52 |
+
pillow==11.0.0
|
53 |
+
libclang==18.1.1
|
54 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
55 |
+
distrax==0.1.5
|
56 |
+
orbax-checkpoint==0.10.1
|
57 |
+
PyYAML==6.0.2
|
58 |
+
urllib3==2.2.3
|
59 |
+
aqtp==0.8.2
|
60 |
+
tensorflow-metadata==1.16.1
|
61 |
+
etils==1.11.0
|
62 |
+
smmap==5.0.1
|
63 |
+
pyasn1==0.6.1
|
64 |
+
docstring_parser==0.16
|
65 |
+
google-auth==2.36.0
|
66 |
+
simplejson==3.19.3
|
67 |
+
mpmath==1.3.0
|
68 |
+
h5py==3.12.1
|
69 |
+
jax-cuda12-pjrt==0.4.35
|
70 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
71 |
+
tensorflow-estimator==2.15.0
|
72 |
+
triton==3.1.0
|
73 |
+
rsa==4.9
|
74 |
+
panopticapi==0.1
|
75 |
+
tensorflow-hub==0.16.1
|
76 |
+
requests==2.32.3
|
77 |
+
scipy==1.14.1
|
78 |
+
ml-dtypes==0.2.0
|
79 |
+
markdown-it-py==3.0.0
|
80 |
+
tensorflow-text==2.15.0
|
81 |
+
wrapt==1.14.1
|
82 |
+
immutabledict==4.2.1
|
83 |
+
MarkupSafe==3.0.2
|
84 |
+
jax==0.4.35
|
85 |
+
torch==2.5.1+cu121
|
86 |
+
wheel==0.45.1
|
87 |
+
einops==0.8.0
|
88 |
+
sentry-sdk==2.19.0
|
89 |
+
torchvision==0.20.1+cu121
|
90 |
+
humanize==4.11.0
|
91 |
+
toml==0.10.2
|
92 |
+
tensorstore==0.1.69
|
93 |
+
six==1.16.0
|
94 |
+
promise==2.3
|
95 |
+
certifi==2024.8.30
|
96 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
97 |
+
flaxformer==0.8.8
|
98 |
+
nvidia-cufft-cu12==11.0.2.54
|
99 |
+
psutil==6.1.0
|
100 |
+
GitPython==3.1.43
|
101 |
+
platformdirs==4.3.6
|
102 |
+
importlib_resources==6.4.5
|
103 |
+
tfds-nightly==4.9.7.dev202411280044
|
104 |
+
tensorflow-gan==2.1.0
|
105 |
+
googleapis-common-protos==1.66.0
|
106 |
+
overrides==7.7.0
|
107 |
+
optree==0.13.1
|
108 |
+
Pygments==2.18.0
|
109 |
+
astunparse==1.6.3
|
110 |
+
ml_collections==1.0.0
|
111 |
+
setproctitle==1.3.4
|
112 |
+
tensorboard-data-server==0.7.2
|
113 |
+
sympy==1.13.1
|
114 |
+
packaging==24.2
|
115 |
+
nest-asyncio==1.6.0
|
116 |
+
nvidia-cublas-cu12==12.1.3.1
|
117 |
+
gitdb==4.0.11
|
118 |
+
click==8.1.7
|
119 |
+
idna==3.10
|
120 |
+
tqdm==4.67.1
|
121 |
+
grpcio==1.68.0
|
122 |
+
decorator==5.1.1
|
123 |
+
pyarrow==18.1.0
|
124 |
+
clu==0.0.12
|
125 |
+
charset-normalizer==3.4.0
|
126 |
+
fsspec==2024.10.0
|
127 |
+
dm-tree==0.1.8
|
128 |
+
sentencepiece==0.2.0
|
129 |
+
nvidia-cusparse-cu12==12.1.0.106
|
130 |
+
torchaudio==2.5.1+cu121
|
131 |
+
pip==24.3.1
|
132 |
+
Markdown==3.7
|
133 |
+
nvidia-nvtx-cu12==12.1.105
|
grafted/wandb/run-20241127_213238-torch-grafted-redux/files/wandb-metadata.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-6.8.0-49-generic-x86_64-with-glibc2.39",
|
3 |
+
"python": "3.11.10",
|
4 |
+
"startedAt": "2024-11-28T05:32:38.332723Z",
|
5 |
+
"args": [
|
6 |
+
"--config",
|
7 |
+
"/home/jason-chou/Downloads/big_vision/big_vision/configs/vit_s16_i1k_single_gpu_test.py",
|
8 |
+
"--workdir",
|
9 |
+
"/data/imagenet/grafted",
|
10 |
+
"--name",
|
11 |
+
"torch-grafted-redux"
|
12 |
+
],
|
13 |
+
"program": "-m big_vision.train",
|
14 |
+
"git": {
|
15 |
+
"remote": "https://github.com/EIFY/big_vision.git",
|
16 |
+
"commit": "44649a64ff67e709f55cdb5e3adcf52064b17de5"
|
17 |
+
},
|
18 |
+
"email": "[email protected]",
|
19 |
+
"root": "/home/jason-chou/Downloads/big_vision",
|
20 |
+
"host": "jasonchou-TensorBook-late-2021",
|
21 |
+
"username": "jason-chou",
|
22 |
+
"executable": "/home/jason-chou/.pyenv/versions/3.11.10/bin/python",
|
23 |
+
"cpu_count": 8,
|
24 |
+
"cpu_count_logical": 16,
|
25 |
+
"gpu": "NVIDIA GeForce RTX 3080 Laptop GPU",
|
26 |
+
"gpu_count": 1,
|
27 |
+
"disk": {
|
28 |
+
"/": {
|
29 |
+
"total": "1006450962432",
|
30 |
+
"used": "584312053760"
|
31 |
+
}
|
32 |
+
},
|
33 |
+
"memory": {
|
34 |
+
"total": "67162914816"
|
35 |
+
},
|
36 |
+
"cpu": {
|
37 |
+
"count": 8,
|
38 |
+
"countLogical": 16
|
39 |
+
},
|
40 |
+
"gpu_nvidia": [
|
41 |
+
{
|
42 |
+
"name": "NVIDIA GeForce RTX 3080 Laptop GPU",
|
43 |
+
"memoryTotal": "17179869184",
|
44 |
+
"cudaCores": 6144,
|
45 |
+
"architecture": "Ampere"
|
46 |
+
}
|
47 |
+
],
|
48 |
+
"cudaVersion": "12.2"
|
49 |
+
}
|
grafted/wandb/run-20241127_213238-torch-grafted-redux/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"_wandb":{"runtime":0}}
|
grafted/wandb/run-20241127_213238-torch-grafted-redux/logs/debug-internal.log
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2024-11-27T21:32:38.333839182-08:00","level":"INFO","msg":"using version","core version":"0.18.7"}
|
2 |
+
{"time":"2024-11-27T21:32:38.333850342-08:00","level":"INFO","msg":"created symlink","path":"/home/jason-chou/Downloads/big_vision/wandb/run-20241127_213238-torch-grafted-redux/logs/debug-core.log"}
|
3 |
+
{"time":"2024-11-27T21:32:38.437006136-08:00","level":"INFO","msg":"created new stream","id":"torch-grafted-redux"}
|
4 |
+
{"time":"2024-11-27T21:32:38.437103435-08:00","level":"INFO","msg":"stream: started","id":"torch-grafted-redux"}
|
5 |
+
{"time":"2024-11-27T21:32:38.437187603-08:00","level":"INFO","msg":"writer: Do: started","stream_id":"torch-grafted-redux"}
|
6 |
+
{"time":"2024-11-27T21:32:38.437273441-08:00","level":"INFO","msg":"handler: started","stream_id":"torch-grafted-redux"}
|
7 |
+
{"time":"2024-11-27T21:32:38.437384909-08:00","level":"INFO","msg":"sender: started","stream_id":"torch-grafted-redux"}
|
8 |
+
{"time":"2024-11-27T21:32:38.935119097-08:00","level":"INFO","msg":"Starting system monitor"}
|
9 |
+
{"time":"2024-11-27T21:32:39.180591766-08:00","level":"INFO","msg":"stream: closing","id":"torch-grafted-redux"}
|
10 |
+
{"time":"2024-11-27T21:32:39.180690557-08:00","level":"INFO","msg":"Stopping system monitor"}
|
11 |
+
{"time":"2024-11-27T21:32:39.181672971-08:00","level":"INFO","msg":"Stopped system monitor"}
|
12 |
+
{"time":"2024-11-27T21:32:39.361912852-08:00","level":"WARN","msg":"No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job"}
|
13 |
+
{"time":"2024-11-27T21:32:39.361944716-08:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
|
14 |
+
{"time":"2024-11-27T21:32:39.739296662-08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
15 |
+
{"time":"2024-11-27T21:32:39.941553056-08:00","level":"INFO","msg":"handler: closed","stream_id":"torch-grafted-redux"}
|
16 |
+
{"time":"2024-11-27T21:32:39.941619024-08:00","level":"INFO","msg":"sender: closed","stream_id":"torch-grafted-redux"}
|
17 |
+
{"time":"2024-11-27T21:32:39.941589779-08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"torch-grafted-redux"}
|
18 |
+
{"time":"2024-11-27T21:32:39.941798781-08:00","level":"INFO","msg":"stream: closed","id":"torch-grafted-redux"}
|
grafted/wandb/run-20241127_213238-torch-grafted-redux/logs/debug.log
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-11-27 21:32:38,330 INFO MainThread:68601 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
|
2 |
+
2024-11-27 21:32:38,330 INFO MainThread:68601 [wandb_setup.py:_flush():79] Configure stats pid to 68601
|
3 |
+
2024-11-27 21:32:38,330 INFO MainThread:68601 [wandb_setup.py:_flush():79] Loading settings from /home/jason-chou/.config/wandb/settings
|
4 |
+
2024-11-27 21:32:38,330 INFO MainThread:68601 [wandb_setup.py:_flush():79] Loading settings from /home/jason-chou/Downloads/big_vision/wandb/settings
|
5 |
+
2024-11-27 21:32:38,330 INFO MainThread:68601 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
|
6 |
+
2024-11-27 21:32:38,330 INFO MainThread:68601 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
|
7 |
+
2024-11-27 21:32:38,330 WARNING MainThread:68601 [wandb_setup.py:_flush():79] Could not find program at -m big_vision.train
|
8 |
+
2024-11-27 21:32:38,330 INFO MainThread:68601 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m big_vision.train'}
|
9 |
+
2024-11-27 21:32:38,330 INFO MainThread:68601 [wandb_setup.py:_flush():79] Applying login settings: {}
|
10 |
+
2024-11-27 21:32:38,330 INFO MainThread:68601 [wandb_init.py:_log_setup():533] Logging user logs to /home/jason-chou/Downloads/big_vision/wandb/run-20241127_213238-torch-grafted-redux/logs/debug.log
|
11 |
+
2024-11-27 21:32:38,330 INFO MainThread:68601 [wandb_init.py:_log_setup():534] Logging internal logs to /home/jason-chou/Downloads/big_vision/wandb/run-20241127_213238-torch-grafted-redux/logs/debug-internal.log
|
12 |
+
2024-11-27 21:32:38,330 INFO MainThread:68601 [wandb_init.py:init():619] calling init triggers
|
13 |
+
2024-11-27 21:32:38,331 INFO MainThread:68601 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
|
14 |
+
config: {'_fields': {'seed': 0, 'total_epochs': 90, 'num_classes': 1000, 'loss': 'softmax_xent', 'input': accum_freq: 8
|
15 |
+
batch_size: 1024
|
16 |
+
cache_raw: false
|
17 |
+
data:
|
18 |
+
name: imagenet2012
|
19 |
+
split: train
|
20 |
+
pp: decode_jpeg_and_inception_crop(224)|flip_lr|randaug(2,10)|value_range(-1, 1)|onehot(1000,
|
21 |
+
key="label", key_result="labels")|keep("image", "labels")
|
22 |
+
shuffle_buffer_size: 150000
|
23 |
+
, 'pp_modules': ['ops_general', 'ops_image', 'ops_text', 'archive.randaug'], 'log_training_steps': 50, 'ckpt_steps': 1000, 'model_name': 'vit', 'model': pool_type: gap
|
24 |
+
posemb: sincos2d
|
25 |
+
rep_size: false
|
26 |
+
variant: S/16
|
27 |
+
, 'grad_clip_norm': 1.0, 'optax_name': 'scale_by_adam', 'optax': mu_dtype: bfloat16
|
28 |
+
, 'lr': 0.001, 'wd': 0.0001, 'schedule': decay_type: cosine
|
29 |
+
warmup_steps: 10000
|
30 |
+
, 'mixup': fold_in: null
|
31 |
+
p: 0.2
|
32 |
+
, 'evals': val:
|
33 |
+
data:
|
34 |
+
name: imagenet2012
|
35 |
+
split: validation
|
36 |
+
log_steps: 2500
|
37 |
+
loss_name: softmax_xent
|
38 |
+
pp_fn: decode|resize_small(256)|central_crop(224)|value_range(-1, 1)|onehot(1000,
|
39 |
+
key="label", key_result="labels")|keep("image", "labels")
|
40 |
+
type: classification
|
41 |
+
}, '_locked': True, '_type_safe': True, '_convert_dict': True, '_allow_dotted_keys': False, '_sort_keys': True}
|
42 |
+
2024-11-27 21:32:38,331 INFO MainThread:68601 [wandb_init.py:init():669] starting backend
|
43 |
+
2024-11-27 21:32:38,331 INFO MainThread:68601 [wandb_init.py:init():673] sending inform_init request
|
44 |
+
2024-11-27 21:32:38,332 INFO MainThread:68601 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
45 |
+
2024-11-27 21:32:38,332 INFO MainThread:68601 [wandb_init.py:init():686] backend started and connected
|
46 |
+
2024-11-27 21:32:38,336 INFO MainThread:68601 [wandb_init.py:init():781] updated telemetry
|
47 |
+
2024-11-27 21:32:38,339 INFO MainThread:68601 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
|
48 |
+
2024-11-27 21:32:38,933 INFO MainThread:68601 [wandb_init.py:init():867] starting run threads in backend
|
49 |
+
2024-11-27 21:32:38,983 INFO MainThread:68601 [wandb_run.py:_console_start():2456] atexit reg
|
50 |
+
2024-11-27 21:32:38,983 INFO MainThread:68601 [wandb_run.py:_redirect():2305] redirect: wrap_raw
|
51 |
+
2024-11-27 21:32:38,984 INFO MainThread:68601 [wandb_run.py:_redirect():2370] Wrapping output streams.
|
52 |
+
2024-11-27 21:32:38,984 INFO MainThread:68601 [wandb_run.py:_redirect():2395] Redirects installed.
|
53 |
+
2024-11-27 21:32:38,984 INFO MainThread:68601 [wandb_init.py:init():911] run started, returning control to user process
|
54 |
+
2024-11-27 21:32:39,180 WARNING MsgRouterThr:68601 [router.py:message_loop():75] message_loop has been closed
|
grafted/wandb/run-20241127_213238-torch-grafted-redux/run-torch-grafted-redux.wandb
ADDED
Binary file (6.53 kB). View file
|
|
grafted/wandb/run-20241127_213407-torch-grafted-redux/files/config.yaml
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_allow_dotted_keys:
|
2 |
+
value: false
|
3 |
+
_convert_dict:
|
4 |
+
value: true
|
5 |
+
_fields:
|
6 |
+
value:
|
7 |
+
ckpt_steps: 1000
|
8 |
+
evals: |
|
9 |
+
val:
|
10 |
+
data:
|
11 |
+
name: imagenet2012
|
12 |
+
split: validation
|
13 |
+
log_steps: 2500
|
14 |
+
loss_name: softmax_xent
|
15 |
+
pp_fn: decode|resize_small(256)|central_crop(224)|value_range(-1, 1)|onehot(1000,
|
16 |
+
key="label", key_result="labels")|keep("image", "labels")
|
17 |
+
type: classification
|
18 |
+
grad_clip_norm: 1
|
19 |
+
input: |
|
20 |
+
accum_freq: 8
|
21 |
+
batch_size: 1024
|
22 |
+
cache_raw: false
|
23 |
+
data:
|
24 |
+
name: imagenet2012
|
25 |
+
split: train
|
26 |
+
pp: decode_jpeg_and_inception_crop(224)|flip_lr|randaug(2,10)|value_range(-1, 1)|onehot(1000,
|
27 |
+
key="label", key_result="labels")|keep("image", "labels")
|
28 |
+
shuffle_buffer_size: 150000
|
29 |
+
log_training_steps: 50
|
30 |
+
loss: softmax_xent
|
31 |
+
lr: 0.001
|
32 |
+
mixup: |
|
33 |
+
fold_in: null
|
34 |
+
p: 0.2
|
35 |
+
model: |
|
36 |
+
pool_type: gap
|
37 |
+
posemb: sincos2d
|
38 |
+
rep_size: false
|
39 |
+
variant: S/16
|
40 |
+
model_name: vit
|
41 |
+
num_classes: 1000
|
42 |
+
optax: |
|
43 |
+
mu_dtype: bfloat16
|
44 |
+
optax_name: scale_by_adam
|
45 |
+
pp_modules:
|
46 |
+
- ops_general
|
47 |
+
- ops_image
|
48 |
+
- ops_text
|
49 |
+
- archive.randaug
|
50 |
+
schedule: |
|
51 |
+
decay_type: cosine
|
52 |
+
warmup_steps: 10000
|
53 |
+
seed: 0
|
54 |
+
total_epochs: 90
|
55 |
+
wd: 0.0001
|
56 |
+
_locked:
|
57 |
+
value: true
|
58 |
+
_sort_keys:
|
59 |
+
value: true
|
60 |
+
_type_safe:
|
61 |
+
value: true
|
62 |
+
_wandb:
|
63 |
+
value:
|
64 |
+
cli_version: 0.18.7
|
65 |
+
m: []
|
66 |
+
python_version: 3.11.10
|
67 |
+
t:
|
68 |
+
"1":
|
69 |
+
- 1
|
70 |
+
- 2
|
71 |
+
- 3
|
72 |
+
- 12
|
73 |
+
- 41
|
74 |
+
- 45
|
75 |
+
- 55
|
76 |
+
"2":
|
77 |
+
- 1
|
78 |
+
- 2
|
79 |
+
- 3
|
80 |
+
- 12
|
81 |
+
- 41
|
82 |
+
- 45
|
83 |
+
- 55
|
84 |
+
"3":
|
85 |
+
- 5
|
86 |
+
- 13
|
87 |
+
- 14
|
88 |
+
- 16
|
89 |
+
- 23
|
90 |
+
- 55
|
91 |
+
- 62
|
92 |
+
"4": 3.11.10
|
93 |
+
"5": 0.18.7
|
94 |
+
"8":
|
95 |
+
- 5
|
96 |
+
"12": 0.18.7
|
97 |
+
"13": linux-x86_64
|
grafted/wandb/run-20241127_213407-torch-grafted-redux/files/output.log
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
I1127 21:34:08.317430 123604392237952 train.py:125] [33mNOTE[0m: Initializing train dataset...
|
2 |
+
I1127 21:34:08.317584 123604392237952 train.py:125] [33mNOTE[0m: Global batch size 1024 on 1 hosts results in 1024 local batch size. With 1 dev per host (1 dev total), that's a 1024 per-device batch size.
|
3 |
+
I1127 21:34:08.468933 123604392237952 logging_logger.py:49] Constructing tf.data.Dataset imagenet2012 for split _EvenSplit(split='train', index=0, count=1, drop_remainder=False), from /home/jason-chou/tensorflow_datasets/imagenet2012/5.1.0
|
4 |
+
Traceback (most recent call last):
|
5 |
+
File "<frozen runpy>", line 198, in _run_module_as_main
|
6 |
+
File "<frozen runpy>", line 88, in _run_code
|
7 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/train.py", line 396, in <module>
|
8 |
+
app.run(main)
|
9 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/absl/app.py", line 308, in run
|
10 |
+
_run_main(main, args)
|
11 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/absl/app.py", line 254, in _run_main
|
12 |
+
sys.exit(main(argv))
|
13 |
+
^^^^^^^^^^
|
14 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/train.py", line 189, in main
|
15 |
+
train_ds, ntrain_img = input_pipeline.training(config.input)
|
16 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
17 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/input_pipeline.py", line 101, in training
|
18 |
+
data=train_data.get_tfdata(ordered=False),
|
19 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
20 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/datasets/tfds.py", line 39, in get_tfdata
|
21 |
+
return _get_dataset_from_builder(
|
22 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^
|
23 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/datasets/tfds.py", line 70, in _get_dataset_from_builder
|
24 |
+
ds = builder.as_dataset(
|
25 |
+
^^^^^^^^^^^^^^^^^^^
|
26 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/tensorflow_datasets/core/logging/__init__.py", line 176, in __call__
|
27 |
+
return function(*args, **kwargs)
|
28 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^
|
29 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/tensorflow_datasets/core/dataset_builder.py", line 1008, in as_dataset
|
30 |
+
raise AssertionError(
|
31 |
+
AssertionError: Dataset imagenet2012: could not find data in /home/jason-chou/tensorflow_datasets. Please make sure to call dataset_builder.download_and_prepare(), or pass download=True to tfds.load() before trying to access the tf.data.Dataset object.
|
grafted/wandb/run-20241127_213407-torch-grafted-redux/files/requirements.txt
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
array_record==0.5.1
|
2 |
+
wandb==0.18.7
|
3 |
+
nvidia-curand-cu12==10.3.2.106
|
4 |
+
requests-oauthlib==2.0.0
|
5 |
+
zipp==3.21.0
|
6 |
+
Werkzeug==3.1.3
|
7 |
+
simple-parsing==0.1.6
|
8 |
+
mdurl==0.1.2
|
9 |
+
keras==2.15.0
|
10 |
+
nvidia-cuda-nvcc-cu12==12.6.85
|
11 |
+
google-auth-oauthlib==1.2.1
|
12 |
+
jaxlib==0.4.34
|
13 |
+
tf_keras==2.15.1
|
14 |
+
oauthlib==3.2.2
|
15 |
+
tensorflow-probability==0.25.0
|
16 |
+
cachetools==5.5.0
|
17 |
+
Jinja2==3.1.3
|
18 |
+
rich==13.9.4
|
19 |
+
filelock==3.13.1
|
20 |
+
google-pasta==0.2.0
|
21 |
+
optax==0.2.4
|
22 |
+
toolz==1.0.0
|
23 |
+
gast==0.6.0
|
24 |
+
tensorboard==2.15.2
|
25 |
+
pyasn1_modules==0.4.1
|
26 |
+
nvidia-cudnn-cu12==9.1.0.70
|
27 |
+
opt_einsum==3.4.0
|
28 |
+
nvidia-nvjitlink-cu12==12.6.85
|
29 |
+
chex==0.1.87
|
30 |
+
namex==0.0.8
|
31 |
+
termcolor==2.5.0
|
32 |
+
flax==0.10.2
|
33 |
+
cloudpickle==3.1.0
|
34 |
+
numpy==1.26.4
|
35 |
+
nvidia-nccl-cu12==2.21.5
|
36 |
+
tensorflow-cpu==2.15.0
|
37 |
+
nvidia-cusolver-cu12==11.4.5.107
|
38 |
+
typing_extensions==4.12.2
|
39 |
+
tensorflow-addons==0.23.0
|
40 |
+
typeguard==2.13.3
|
41 |
+
absl-py==2.1.0
|
42 |
+
flatbuffers==24.3.25
|
43 |
+
dlpack==0.1
|
44 |
+
setuptools==65.5.0
|
45 |
+
protobuf==4.25.5
|
46 |
+
jax-cuda12-plugin==0.4.35
|
47 |
+
tensorflow==2.15.0
|
48 |
+
msgpack==1.1.0
|
49 |
+
networkx==3.2.1
|
50 |
+
docker-pycreds==0.4.0
|
51 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
52 |
+
pillow==11.0.0
|
53 |
+
libclang==18.1.1
|
54 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
55 |
+
distrax==0.1.5
|
56 |
+
orbax-checkpoint==0.10.1
|
57 |
+
PyYAML==6.0.2
|
58 |
+
urllib3==2.2.3
|
59 |
+
aqtp==0.8.2
|
60 |
+
tensorflow-metadata==1.16.1
|
61 |
+
etils==1.11.0
|
62 |
+
smmap==5.0.1
|
63 |
+
pyasn1==0.6.1
|
64 |
+
docstring_parser==0.16
|
65 |
+
google-auth==2.36.0
|
66 |
+
simplejson==3.19.3
|
67 |
+
mpmath==1.3.0
|
68 |
+
h5py==3.12.1
|
69 |
+
jax-cuda12-pjrt==0.4.35
|
70 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
71 |
+
tensorflow-estimator==2.15.0
|
72 |
+
triton==3.1.0
|
73 |
+
rsa==4.9
|
74 |
+
panopticapi==0.1
|
75 |
+
tensorflow-hub==0.16.1
|
76 |
+
requests==2.32.3
|
77 |
+
scipy==1.14.1
|
78 |
+
ml-dtypes==0.2.0
|
79 |
+
markdown-it-py==3.0.0
|
80 |
+
tensorflow-text==2.15.0
|
81 |
+
wrapt==1.14.1
|
82 |
+
immutabledict==4.2.1
|
83 |
+
MarkupSafe==3.0.2
|
84 |
+
jax==0.4.35
|
85 |
+
torch==2.5.1+cu121
|
86 |
+
wheel==0.45.1
|
87 |
+
einops==0.8.0
|
88 |
+
sentry-sdk==2.19.0
|
89 |
+
torchvision==0.20.1+cu121
|
90 |
+
humanize==4.11.0
|
91 |
+
toml==0.10.2
|
92 |
+
tensorstore==0.1.69
|
93 |
+
six==1.16.0
|
94 |
+
promise==2.3
|
95 |
+
certifi==2024.8.30
|
96 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
97 |
+
flaxformer==0.8.8
|
98 |
+
nvidia-cufft-cu12==11.0.2.54
|
99 |
+
psutil==6.1.0
|
100 |
+
GitPython==3.1.43
|
101 |
+
platformdirs==4.3.6
|
102 |
+
importlib_resources==6.4.5
|
103 |
+
tfds-nightly==4.9.7.dev202411280044
|
104 |
+
tensorflow-gan==2.1.0
|
105 |
+
googleapis-common-protos==1.66.0
|
106 |
+
overrides==7.7.0
|
107 |
+
optree==0.13.1
|
108 |
+
Pygments==2.18.0
|
109 |
+
astunparse==1.6.3
|
110 |
+
ml_collections==1.0.0
|
111 |
+
setproctitle==1.3.4
|
112 |
+
tensorboard-data-server==0.7.2
|
113 |
+
sympy==1.13.1
|
114 |
+
packaging==24.2
|
115 |
+
nest-asyncio==1.6.0
|
116 |
+
nvidia-cublas-cu12==12.1.3.1
|
117 |
+
gitdb==4.0.11
|
118 |
+
click==8.1.7
|
119 |
+
idna==3.10
|
120 |
+
tqdm==4.67.1
|
121 |
+
grpcio==1.68.0
|
122 |
+
decorator==5.1.1
|
123 |
+
pyarrow==18.1.0
|
124 |
+
clu==0.0.12
|
125 |
+
charset-normalizer==3.4.0
|
126 |
+
fsspec==2024.10.0
|
127 |
+
dm-tree==0.1.8
|
128 |
+
sentencepiece==0.2.0
|
129 |
+
nvidia-cusparse-cu12==12.1.0.106
|
130 |
+
torchaudio==2.5.1+cu121
|
131 |
+
pip==24.3.1
|
132 |
+
Markdown==3.7
|
133 |
+
nvidia-nvtx-cu12==12.1.105
|
grafted/wandb/run-20241127_213407-torch-grafted-redux/files/wandb-metadata.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-6.8.0-49-generic-x86_64-with-glibc2.39",
|
3 |
+
"python": "3.11.10",
|
4 |
+
"startedAt": "2024-11-28T05:34:07.870557Z",
|
5 |
+
"args": [
|
6 |
+
"--config",
|
7 |
+
"/home/jason-chou/Downloads/big_vision/big_vision/configs/vit_s16_i1k_single_gpu_test.py",
|
8 |
+
"--workdir",
|
9 |
+
"/data/imagenet/grafted",
|
10 |
+
"--name",
|
11 |
+
"torch-grafted-redux"
|
12 |
+
],
|
13 |
+
"program": "-m big_vision.train",
|
14 |
+
"git": {
|
15 |
+
"remote": "https://github.com/EIFY/big_vision.git",
|
16 |
+
"commit": "44649a64ff67e709f55cdb5e3adcf52064b17de5"
|
17 |
+
},
|
18 |
+
"email": "[email protected]",
|
19 |
+
"root": "/home/jason-chou/Downloads/big_vision",
|
20 |
+
"host": "jasonchou-TensorBook-late-2021",
|
21 |
+
"username": "jason-chou",
|
22 |
+
"executable": "/home/jason-chou/.pyenv/versions/3.11.10/bin/python",
|
23 |
+
"cpu_count": 8,
|
24 |
+
"cpu_count_logical": 16,
|
25 |
+
"gpu": "NVIDIA GeForce RTX 3080 Laptop GPU",
|
26 |
+
"gpu_count": 1,
|
27 |
+
"disk": {
|
28 |
+
"/": {
|
29 |
+
"total": "1006450962432",
|
30 |
+
"used": "584312168448"
|
31 |
+
}
|
32 |
+
},
|
33 |
+
"memory": {
|
34 |
+
"total": "67162914816"
|
35 |
+
},
|
36 |
+
"cpu": {
|
37 |
+
"count": 8,
|
38 |
+
"countLogical": 16
|
39 |
+
},
|
40 |
+
"gpu_nvidia": [
|
41 |
+
{
|
42 |
+
"name": "NVIDIA GeForce RTX 3080 Laptop GPU",
|
43 |
+
"memoryTotal": "17179869184",
|
44 |
+
"cudaCores": 6144,
|
45 |
+
"architecture": "Ampere"
|
46 |
+
}
|
47 |
+
],
|
48 |
+
"cudaVersion": "12.2"
|
49 |
+
}
|
grafted/wandb/run-20241127_213407-torch-grafted-redux/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"_wandb":{"runtime":0}}
|
grafted/wandb/run-20241127_213407-torch-grafted-redux/logs/debug-internal.log
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2024-11-27T21:34:07.871305008-08:00","level":"INFO","msg":"using version","core version":"0.18.7"}
|
2 |
+
{"time":"2024-11-27T21:34:07.871312795-08:00","level":"INFO","msg":"created symlink","path":"/home/jason-chou/Downloads/big_vision/wandb/run-20241127_213407-torch-grafted-redux/logs/debug-core.log"}
|
3 |
+
{"time":"2024-11-27T21:34:07.974433557-08:00","level":"INFO","msg":"created new stream","id":"torch-grafted-redux"}
|
4 |
+
{"time":"2024-11-27T21:34:07.974483345-08:00","level":"INFO","msg":"stream: started","id":"torch-grafted-redux"}
|
5 |
+
{"time":"2024-11-27T21:34:07.974637769-08:00","level":"INFO","msg":"writer: Do: started","stream_id":"torch-grafted-redux"}
|
6 |
+
{"time":"2024-11-27T21:34:07.974694546-08:00","level":"INFO","msg":"handler: started","stream_id":"torch-grafted-redux"}
|
7 |
+
{"time":"2024-11-27T21:34:07.974824798-08:00","level":"INFO","msg":"sender: started","stream_id":"torch-grafted-redux"}
|
8 |
+
{"time":"2024-11-27T21:34:08.265740433-08:00","level":"INFO","msg":"Starting system monitor"}
|
9 |
+
{"time":"2024-11-27T21:34:08.510645063-08:00","level":"INFO","msg":"stream: closing","id":"torch-grafted-redux"}
|
10 |
+
{"time":"2024-11-27T21:34:08.510725098-08:00","level":"INFO","msg":"Stopping system monitor"}
|
11 |
+
{"time":"2024-11-27T21:34:08.511601534-08:00","level":"INFO","msg":"Stopped system monitor"}
|
12 |
+
{"time":"2024-11-27T21:34:08.671262357-08:00","level":"WARN","msg":"No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job"}
|
13 |
+
{"time":"2024-11-27T21:34:08.671295778-08:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
|
14 |
+
{"time":"2024-11-27T21:34:09.08563661-08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
15 |
+
{"time":"2024-11-27T21:34:09.270472343-08:00","level":"INFO","msg":"handler: closed","stream_id":"torch-grafted-redux"}
|
16 |
+
{"time":"2024-11-27T21:34:09.270565975-08:00","level":"INFO","msg":"sender: closed","stream_id":"torch-grafted-redux"}
|
17 |
+
{"time":"2024-11-27T21:34:09.270568744-08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"torch-grafted-redux"}
|
18 |
+
{"time":"2024-11-27T21:34:09.270733853-08:00","level":"INFO","msg":"stream: closed","id":"torch-grafted-redux"}
|
grafted/wandb/run-20241127_213407-torch-grafted-redux/logs/debug.log
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-11-27 21:34:07,867 INFO MainThread:68916 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
|
2 |
+
2024-11-27 21:34:07,867 INFO MainThread:68916 [wandb_setup.py:_flush():79] Configure stats pid to 68916
|
3 |
+
2024-11-27 21:34:07,867 INFO MainThread:68916 [wandb_setup.py:_flush():79] Loading settings from /home/jason-chou/.config/wandb/settings
|
4 |
+
2024-11-27 21:34:07,867 INFO MainThread:68916 [wandb_setup.py:_flush():79] Loading settings from /home/jason-chou/Downloads/big_vision/wandb/settings
|
5 |
+
2024-11-27 21:34:07,867 INFO MainThread:68916 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
|
6 |
+
2024-11-27 21:34:07,867 INFO MainThread:68916 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
|
7 |
+
2024-11-27 21:34:07,867 WARNING MainThread:68916 [wandb_setup.py:_flush():79] Could not find program at -m big_vision.train
|
8 |
+
2024-11-27 21:34:07,867 INFO MainThread:68916 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m big_vision.train'}
|
9 |
+
2024-11-27 21:34:07,867 INFO MainThread:68916 [wandb_setup.py:_flush():79] Applying login settings: {}
|
10 |
+
2024-11-27 21:34:07,867 INFO MainThread:68916 [wandb_init.py:_log_setup():533] Logging user logs to /home/jason-chou/Downloads/big_vision/wandb/run-20241127_213407-torch-grafted-redux/logs/debug.log
|
11 |
+
2024-11-27 21:34:07,867 INFO MainThread:68916 [wandb_init.py:_log_setup():534] Logging internal logs to /home/jason-chou/Downloads/big_vision/wandb/run-20241127_213407-torch-grafted-redux/logs/debug-internal.log
|
12 |
+
2024-11-27 21:34:07,867 INFO MainThread:68916 [wandb_init.py:init():619] calling init triggers
|
13 |
+
2024-11-27 21:34:07,869 INFO MainThread:68916 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
|
14 |
+
config: {'_fields': {'seed': 0, 'total_epochs': 90, 'num_classes': 1000, 'loss': 'softmax_xent', 'input': accum_freq: 8
|
15 |
+
batch_size: 1024
|
16 |
+
cache_raw: false
|
17 |
+
data:
|
18 |
+
name: imagenet2012
|
19 |
+
split: train
|
20 |
+
pp: decode_jpeg_and_inception_crop(224)|flip_lr|randaug(2,10)|value_range(-1, 1)|onehot(1000,
|
21 |
+
key="label", key_result="labels")|keep("image", "labels")
|
22 |
+
shuffle_buffer_size: 150000
|
23 |
+
, 'pp_modules': ['ops_general', 'ops_image', 'ops_text', 'archive.randaug'], 'log_training_steps': 50, 'ckpt_steps': 1000, 'model_name': 'vit', 'model': pool_type: gap
|
24 |
+
posemb: sincos2d
|
25 |
+
rep_size: false
|
26 |
+
variant: S/16
|
27 |
+
, 'grad_clip_norm': 1.0, 'optax_name': 'scale_by_adam', 'optax': mu_dtype: bfloat16
|
28 |
+
, 'lr': 0.001, 'wd': 0.0001, 'schedule': decay_type: cosine
|
29 |
+
warmup_steps: 10000
|
30 |
+
, 'mixup': fold_in: null
|
31 |
+
p: 0.2
|
32 |
+
, 'evals': val:
|
33 |
+
data:
|
34 |
+
name: imagenet2012
|
35 |
+
split: validation
|
36 |
+
log_steps: 2500
|
37 |
+
loss_name: softmax_xent
|
38 |
+
pp_fn: decode|resize_small(256)|central_crop(224)|value_range(-1, 1)|onehot(1000,
|
39 |
+
key="label", key_result="labels")|keep("image", "labels")
|
40 |
+
type: classification
|
41 |
+
}, '_locked': True, '_type_safe': True, '_convert_dict': True, '_allow_dotted_keys': False, '_sort_keys': True}
|
42 |
+
2024-11-27 21:34:07,869 INFO MainThread:68916 [wandb_init.py:init():669] starting backend
|
43 |
+
2024-11-27 21:34:07,869 INFO MainThread:68916 [wandb_init.py:init():673] sending inform_init request
|
44 |
+
2024-11-27 21:34:07,870 INFO MainThread:68916 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
45 |
+
2024-11-27 21:34:07,870 INFO MainThread:68916 [wandb_init.py:init():686] backend started and connected
|
46 |
+
2024-11-27 21:34:07,873 INFO MainThread:68916 [wandb_init.py:init():781] updated telemetry
|
47 |
+
2024-11-27 21:34:07,875 INFO MainThread:68916 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
|
48 |
+
2024-11-27 21:34:08,241 INFO MainThread:68916 [wandb_init.py:init():859] run resumed
|
49 |
+
2024-11-27 21:34:08,261 INFO MainThread:68916 [wandb_init.py:init():867] starting run threads in backend
|
50 |
+
2024-11-27 21:34:08,315 INFO MainThread:68916 [wandb_run.py:_console_start():2456] atexit reg
|
51 |
+
2024-11-27 21:34:08,315 INFO MainThread:68916 [wandb_run.py:_redirect():2305] redirect: wrap_raw
|
52 |
+
2024-11-27 21:34:08,316 INFO MainThread:68916 [wandb_run.py:_redirect():2370] Wrapping output streams.
|
53 |
+
2024-11-27 21:34:08,316 INFO MainThread:68916 [wandb_run.py:_redirect():2395] Redirects installed.
|
54 |
+
2024-11-27 21:34:08,317 INFO MainThread:68916 [wandb_init.py:init():911] run started, returning control to user process
|
55 |
+
2024-11-27 21:34:08,511 WARNING MsgRouterThr:68916 [router.py:message_loop():75] message_loop has been closed
|
grafted/wandb/run-20241127_213407-torch-grafted-redux/run-torch-grafted-redux.wandb
ADDED
Binary file (6.64 kB). View file
|
|
grafted/wandb/run-20241127_213838-torch-grafted-redux/files/config.yaml
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_allow_dotted_keys:
|
2 |
+
value: false
|
3 |
+
_convert_dict:
|
4 |
+
value: true
|
5 |
+
_fields:
|
6 |
+
value:
|
7 |
+
ckpt_steps: 1000
|
8 |
+
evals: |
|
9 |
+
val:
|
10 |
+
data:
|
11 |
+
name: imagenet2012
|
12 |
+
split: validation
|
13 |
+
log_steps: 2500
|
14 |
+
loss_name: softmax_xent
|
15 |
+
pp_fn: decode|resize_small(256)|central_crop(224)|value_range(-1, 1)|onehot(1000,
|
16 |
+
key="label", key_result="labels")|keep("image", "labels")
|
17 |
+
type: classification
|
18 |
+
grad_clip_norm: 1
|
19 |
+
input: |
|
20 |
+
accum_freq: 8
|
21 |
+
batch_size: 1024
|
22 |
+
cache_raw: false
|
23 |
+
data:
|
24 |
+
name: imagenet2012
|
25 |
+
split: train
|
26 |
+
pp: decode_jpeg_and_inception_crop(224)|flip_lr|randaug(2,10)|value_range(-1, 1)|onehot(1000,
|
27 |
+
key="label", key_result="labels")|keep("image", "labels")
|
28 |
+
shuffle_buffer_size: 150000
|
29 |
+
log_training_steps: 50
|
30 |
+
loss: softmax_xent
|
31 |
+
lr: 0.001
|
32 |
+
mixup: |
|
33 |
+
fold_in: null
|
34 |
+
p: 0.2
|
35 |
+
model: |
|
36 |
+
pool_type: gap
|
37 |
+
posemb: sincos2d
|
38 |
+
rep_size: false
|
39 |
+
variant: S/16
|
40 |
+
model_name: vit
|
41 |
+
num_classes: 1000
|
42 |
+
optax: |
|
43 |
+
mu_dtype: bfloat16
|
44 |
+
optax_name: scale_by_adam
|
45 |
+
pp_modules:
|
46 |
+
- ops_general
|
47 |
+
- ops_image
|
48 |
+
- ops_text
|
49 |
+
- archive.randaug
|
50 |
+
schedule: |
|
51 |
+
decay_type: cosine
|
52 |
+
warmup_steps: 10000
|
53 |
+
seed: 0
|
54 |
+
total_epochs: 90
|
55 |
+
wd: 0.0001
|
56 |
+
_locked:
|
57 |
+
value: true
|
58 |
+
_sort_keys:
|
59 |
+
value: true
|
60 |
+
_type_safe:
|
61 |
+
value: true
|
62 |
+
_wandb:
|
63 |
+
value:
|
64 |
+
cli_version: 0.18.7
|
65 |
+
m: []
|
66 |
+
python_version: 3.11.10
|
67 |
+
t:
|
68 |
+
"1":
|
69 |
+
- 1
|
70 |
+
- 2
|
71 |
+
- 3
|
72 |
+
- 12
|
73 |
+
- 41
|
74 |
+
- 45
|
75 |
+
- 55
|
76 |
+
"2":
|
77 |
+
- 1
|
78 |
+
- 2
|
79 |
+
- 3
|
80 |
+
- 12
|
81 |
+
- 41
|
82 |
+
- 45
|
83 |
+
- 55
|
84 |
+
"3":
|
85 |
+
- 5
|
86 |
+
- 13
|
87 |
+
- 14
|
88 |
+
- 16
|
89 |
+
- 23
|
90 |
+
- 55
|
91 |
+
- 62
|
92 |
+
"4": 3.11.10
|
93 |
+
"5": 0.18.7
|
94 |
+
"8":
|
95 |
+
- 5
|
96 |
+
"12": 0.18.7
|
97 |
+
"13": linux-x86_64
|
grafted/wandb/run-20241127_213838-torch-grafted-redux/files/output.log
ADDED
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
I1127 21:38:39.183649 134109837835136 train.py:125] [33mNOTE[0m: Initializing train dataset...
|
2 |
+
I1127 21:38:39.183780 134109837835136 train.py:125] [33mNOTE[0m: Global batch size 1024 on 1 hosts results in 1024 local batch size. With 1 dev per host (1 dev total), that's a 1024 per-device batch size.
|
3 |
+
I1127 21:38:39.334601 134109837835136 dataset_info.py:707] Load dataset info from /data/tensorflow_datasets/imagenet2012/5.1.0
|
4 |
+
I1127 21:38:39.359155 134109837835136 reader.py:261] Creating a tf.data.Dataset reading 1024 files located in folders: /data/tensorflow_datasets/imagenet2012/5.1.0.
|
5 |
+
WARNING:tensorflow:From /home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/tensorflow_datasets/core/reader.py:101: CounterV2 (from tensorflow.python.data.experimental.ops.counter) is deprecated and will be removed in a future version.
|
6 |
+
Instructions for updating:
|
7 |
+
Use `tf.data.Dataset.counter(...)` instead.
|
8 |
+
W1127 21:38:39.392815 134109837835136 deprecation.py:50] From /home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/tensorflow_datasets/core/reader.py:101: CounterV2 (from tensorflow.python.data.experimental.ops.counter) is deprecated and will be removed in a future version.
|
9 |
+
Instructions for updating:
|
10 |
+
Use `tf.data.Dataset.counter(...)` instead.
|
11 |
+
I1127 21:38:39.415093 134109837835136 logging_logger.py:49] Constructing tf.data.Dataset imagenet2012 for split _EvenSplit(split='train', index=0, count=1, drop_remainder=False), from /data/tensorflow_datasets/imagenet2012/5.1.0
|
12 |
+
I1127 21:38:39.481757 134109837835136 api.py:460] Data before pre-processing:
|
13 |
+
{'file_name': <tf.Tensor 'args_1:0' shape=() dtype=string>, 'image': <tf.Tensor 'args_2:0' shape=() dtype=string>, 'label': <tf.Tensor 'args_3:0' shape=() dtype=int64>, 'tfds_id': <tf.Tensor 'args_4:0' shape=() dtype=string>, '_id': <tf.Tensor 'args_0:0' shape=() dtype=int32>}
|
14 |
+
INFO:tensorflow:Using RandAug.
|
15 |
+
I1127 21:38:39.744788 134109837835136 api.py:460] Using RandAug.
|
16 |
+
WARNING:tensorflow:From /home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/tensorflow/python/util/dispatch.py:1260: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
|
17 |
+
Instructions for updating:
|
18 |
+
Use `tf.cast` instead.
|
19 |
+
W1127 21:38:39.922924 134109837835136 deprecation.py:50] From /home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/tensorflow/python/util/dispatch.py:1260: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
|
20 |
+
Instructions for updating:
|
21 |
+
Use `tf.cast` instead.
|
22 |
+
I1127 21:38:42.147573 134109837835136 api.py:460] Data after pre-processing:
|
23 |
+
{'image': <tf.Tensor 'add:0' shape=(224, 224, 3) dtype=float32>, 'labels': <tf.Tensor 'one_hot:0' shape=(1000,) dtype=float32>}
|
24 |
+
I1127 21:38:42.218924 134109837835136 train.py:125] [33mNOTE[0m: Running for 112603 steps, that means 90.000345 epochs
|
25 |
+
I1127 21:38:42.850795 134109837835136 train.py:125] [33mNOTE[0m: Creating model...
|
26 |
+
Weight decay for: conv_proj.weight
|
27 |
+
Weight decay for: encoder.layers.encoder_layer_0.self_attention.in_proj_weight
|
28 |
+
Weight decay for: encoder.layers.encoder_layer_0.self_attention.out_proj.weight
|
29 |
+
Weight decay for: encoder.layers.encoder_layer_0.mlp.0.weight
|
30 |
+
Weight decay for: encoder.layers.encoder_layer_0.mlp.3.weight
|
31 |
+
Weight decay for: encoder.layers.encoder_layer_1.self_attention.in_proj_weight
|
32 |
+
Weight decay for: encoder.layers.encoder_layer_1.self_attention.out_proj.weight
|
33 |
+
Weight decay for: encoder.layers.encoder_layer_1.mlp.0.weight
|
34 |
+
Weight decay for: encoder.layers.encoder_layer_1.mlp.3.weight
|
35 |
+
Weight decay for: encoder.layers.encoder_layer_2.self_attention.in_proj_weight
|
36 |
+
Weight decay for: encoder.layers.encoder_layer_2.self_attention.out_proj.weight
|
37 |
+
Weight decay for: encoder.layers.encoder_layer_2.mlp.0.weight
|
38 |
+
Weight decay for: encoder.layers.encoder_layer_2.mlp.3.weight
|
39 |
+
Weight decay for: encoder.layers.encoder_layer_3.self_attention.in_proj_weight
|
40 |
+
Weight decay for: encoder.layers.encoder_layer_3.self_attention.out_proj.weight
|
41 |
+
Weight decay for: encoder.layers.encoder_layer_3.mlp.0.weight
|
42 |
+
Weight decay for: encoder.layers.encoder_layer_3.mlp.3.weight
|
43 |
+
Weight decay for: encoder.layers.encoder_layer_4.self_attention.in_proj_weight
|
44 |
+
Weight decay for: encoder.layers.encoder_layer_4.self_attention.out_proj.weight
|
45 |
+
Weight decay for: encoder.layers.encoder_layer_4.mlp.0.weight
|
46 |
+
Weight decay for: encoder.layers.encoder_layer_4.mlp.3.weight
|
47 |
+
Weight decay for: encoder.layers.encoder_layer_5.self_attention.in_proj_weight
|
48 |
+
Weight decay for: encoder.layers.encoder_layer_5.self_attention.out_proj.weight
|
49 |
+
Weight decay for: encoder.layers.encoder_layer_5.mlp.0.weight
|
50 |
+
Weight decay for: encoder.layers.encoder_layer_5.mlp.3.weight
|
51 |
+
Weight decay for: encoder.layers.encoder_layer_6.self_attention.in_proj_weight
|
52 |
+
Weight decay for: encoder.layers.encoder_layer_6.self_attention.out_proj.weight
|
53 |
+
Weight decay for: encoder.layers.encoder_layer_6.mlp.0.weight
|
54 |
+
Weight decay for: encoder.layers.encoder_layer_6.mlp.3.weight
|
55 |
+
Weight decay for: encoder.layers.encoder_layer_7.self_attention.in_proj_weight
|
56 |
+
Weight decay for: encoder.layers.encoder_layer_7.self_attention.out_proj.weight
|
57 |
+
Weight decay for: encoder.layers.encoder_layer_7.mlp.0.weight
|
58 |
+
Weight decay for: encoder.layers.encoder_layer_7.mlp.3.weight
|
59 |
+
Weight decay for: encoder.layers.encoder_layer_8.self_attention.in_proj_weight
|
60 |
+
Weight decay for: encoder.layers.encoder_layer_8.self_attention.out_proj.weight
|
61 |
+
Weight decay for: encoder.layers.encoder_layer_8.mlp.0.weight
|
62 |
+
Weight decay for: encoder.layers.encoder_layer_8.mlp.3.weight
|
63 |
+
Weight decay for: encoder.layers.encoder_layer_9.self_attention.in_proj_weight
|
64 |
+
Weight decay for: encoder.layers.encoder_layer_9.self_attention.out_proj.weight
|
65 |
+
Weight decay for: encoder.layers.encoder_layer_9.mlp.0.weight
|
66 |
+
Weight decay for: encoder.layers.encoder_layer_9.mlp.3.weight
|
67 |
+
Weight decay for: encoder.layers.encoder_layer_10.self_attention.in_proj_weight
|
68 |
+
Weight decay for: encoder.layers.encoder_layer_10.self_attention.out_proj.weight
|
69 |
+
Weight decay for: encoder.layers.encoder_layer_10.mlp.0.weight
|
70 |
+
Weight decay for: encoder.layers.encoder_layer_10.mlp.3.weight
|
71 |
+
Weight decay for: encoder.layers.encoder_layer_11.self_attention.in_proj_weight
|
72 |
+
Weight decay for: encoder.layers.encoder_layer_11.self_attention.out_proj.weight
|
73 |
+
Weight decay for: encoder.layers.encoder_layer_11.mlp.0.weight
|
74 |
+
Weight decay for: encoder.layers.encoder_layer_11.mlp.3.weight
|
75 |
+
Weight decay for: heads.head.weight
|
76 |
+
Weight decay for: conv_proj.weight
|
77 |
+
Weight decay for: encoder.layers.encoder_layer_0.self_attention.in_proj_weight
|
78 |
+
Weight decay for: encoder.layers.encoder_layer_0.self_attention.out_proj.weight
|
79 |
+
Weight decay for: encoder.layers.encoder_layer_0.mlp.0.weight
|
80 |
+
Weight decay for: encoder.layers.encoder_layer_0.mlp.3.weight
|
81 |
+
Weight decay for: encoder.layers.encoder_layer_1.self_attention.in_proj_weight
|
82 |
+
Weight decay for: encoder.layers.encoder_layer_1.self_attention.out_proj.weight
|
83 |
+
Weight decay for: encoder.layers.encoder_layer_1.mlp.0.weight
|
84 |
+
Weight decay for: encoder.layers.encoder_layer_1.mlp.3.weight
|
85 |
+
Weight decay for: encoder.layers.encoder_layer_2.self_attention.in_proj_weight
|
86 |
+
Weight decay for: encoder.layers.encoder_layer_2.self_attention.out_proj.weight
|
87 |
+
Weight decay for: encoder.layers.encoder_layer_2.mlp.0.weight
|
88 |
+
Weight decay for: encoder.layers.encoder_layer_2.mlp.3.weight
|
89 |
+
Weight decay for: encoder.layers.encoder_layer_3.self_attention.in_proj_weight
|
90 |
+
Weight decay for: encoder.layers.encoder_layer_3.self_attention.out_proj.weight
|
91 |
+
Weight decay for: encoder.layers.encoder_layer_3.mlp.0.weight
|
92 |
+
Weight decay for: encoder.layers.encoder_layer_3.mlp.3.weight
|
93 |
+
Weight decay for: encoder.layers.encoder_layer_4.self_attention.in_proj_weight
|
94 |
+
Weight decay for: encoder.layers.encoder_layer_4.self_attention.out_proj.weight
|
95 |
+
Weight decay for: encoder.layers.encoder_layer_4.mlp.0.weight
|
96 |
+
Weight decay for: encoder.layers.encoder_layer_4.mlp.3.weight
|
97 |
+
Weight decay for: encoder.layers.encoder_layer_5.self_attention.in_proj_weight
|
98 |
+
Weight decay for: encoder.layers.encoder_layer_5.self_attention.out_proj.weight
|
99 |
+
Weight decay for: encoder.layers.encoder_layer_5.mlp.0.weight
|
100 |
+
Weight decay for: encoder.layers.encoder_layer_5.mlp.3.weight
|
101 |
+
Weight decay for: encoder.layers.encoder_layer_6.self_attention.in_proj_weight
|
102 |
+
Weight decay for: encoder.layers.encoder_layer_6.self_attention.out_proj.weight
|
103 |
+
Weight decay for: encoder.layers.encoder_layer_6.mlp.0.weight
|
104 |
+
Weight decay for: encoder.layers.encoder_layer_6.mlp.3.weight
|
105 |
+
Weight decay for: encoder.layers.encoder_layer_7.self_attention.in_proj_weight
|
106 |
+
Weight decay for: encoder.layers.encoder_layer_7.self_attention.out_proj.weight
|
107 |
+
Weight decay for: encoder.layers.encoder_layer_7.mlp.0.weight
|
108 |
+
Weight decay for: encoder.layers.encoder_layer_7.mlp.3.weight
|
109 |
+
Weight decay for: encoder.layers.encoder_layer_8.self_attention.in_proj_weight
|
110 |
+
Weight decay for: encoder.layers.encoder_layer_8.self_attention.out_proj.weight
|
111 |
+
Weight decay for: encoder.layers.encoder_layer_8.mlp.0.weight
|
112 |
+
Weight decay for: encoder.layers.encoder_layer_8.mlp.3.weight
|
113 |
+
Weight decay for: encoder.layers.encoder_layer_9.self_attention.in_proj_weight
|
114 |
+
Weight decay for: encoder.layers.encoder_layer_9.self_attention.out_proj.weight
|
115 |
+
Weight decay for: encoder.layers.encoder_layer_9.mlp.0.weight
|
116 |
+
Weight decay for: encoder.layers.encoder_layer_9.mlp.3.weight
|
117 |
+
Weight decay for: encoder.layers.encoder_layer_10.self_attention.in_proj_weight
|
118 |
+
Weight decay for: encoder.layers.encoder_layer_10.self_attention.out_proj.weight
|
119 |
+
Weight decay for: encoder.layers.encoder_layer_10.mlp.0.weight
|
120 |
+
Weight decay for: encoder.layers.encoder_layer_10.mlp.3.weight
|
121 |
+
Weight decay for: encoder.layers.encoder_layer_11.self_attention.in_proj_weight
|
122 |
+
Weight decay for: encoder.layers.encoder_layer_11.self_attention.out_proj.weight
|
123 |
+
Weight decay for: encoder.layers.encoder_layer_11.mlp.0.weight
|
124 |
+
Weight decay for: encoder.layers.encoder_layer_11.mlp.3.weight
|
125 |
+
Weight decay for: heads.head.weight
|
126 |
+
I1127 21:38:43.733023 134109837835136 train.py:125] [33mNOTE[0m: Running initial or final evals...
|
127 |
+
I1127 21:38:43.733439 134109837835136 train.py:125] [33mNOTE[0m: Init evaluator: val…
|
128 |
+
Steps:0/112603 [0.0%]
|
129 |
+
I1127 21:38:43.735805 134109837835136 reader.py:261] Creating a tf.data.Dataset reading 64 files located in folders: /data/tensorflow_datasets/imagenet2012/5.1.0.
|
130 |
+
I1127 21:38:43.766425 134109837835136 logging_logger.py:49] Constructing tf.data.Dataset imagenet2012 for split _EvenSplit(split='validation', index=0, count=1, drop_remainder=False), from /data/tensorflow_datasets/imagenet2012/5.1.0
|
131 |
+
I1127 21:38:43.800241 134109837835136 api.py:460] Data before pre-processing:
|
132 |
+
{'file_name': <tf.Tensor 'args_1:0' shape=() dtype=string>, 'image': <tf.Tensor 'args_2:0' shape=() dtype=string>, 'label': <tf.Tensor 'args_3:0' shape=() dtype=int64>, 'tfds_id': <tf.Tensor 'args_4:0' shape=() dtype=string>, '_id': <tf.Tensor 'args_0:0' shape=() dtype=int32>}
|
133 |
+
I1127 21:38:43.969007 134109837835136 api.py:460] Data after pre-processing:
|
134 |
+
{'image': <tf.Tensor 'add:0' shape=(224, 224, 3) dtype=float32>, 'labels': <tf.Tensor 'one_hot:0' shape=(1000,) dtype=float32>}
|
135 |
+
I1127 21:38:44.062357 134109837835136 train.py:125] [33mNOTE[0m: val evaluation...
|
136 |
+
Steps:0/112603 [0.0%]
|
137 |
+
Traceback (most recent call last):
|
138 |
+
File "<frozen runpy>", line 198, in _run_module_as_main
|
139 |
+
File "<frozen runpy>", line 88, in _run_code
|
140 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/train.py", line 396, in <module>
|
141 |
+
app.run(main)
|
142 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/absl/app.py", line 308, in run
|
143 |
+
_run_main(main, args)
|
144 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/absl/app.py", line 254, in _run_main
|
145 |
+
sys.exit(main(argv))
|
146 |
+
^^^^^^^^^^
|
147 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/train.py", line 296, in main
|
148 |
+
for key, value in evaluator.run(model, criterion, config.input.accum_freq):
|
149 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/evaluators/classification.py", line 74, in run
|
150 |
+
images, target = torch.from_dlpack(dlpack.asdlpack(images)), torch.from_dlpack(dlpack.asdlpack(target))
|
151 |
+
^^^^^^^^^^^^^^^
|
152 |
+
AttributeError: module 'dlpack' has no attribute 'asdlpack'
|
grafted/wandb/run-20241127_213838-torch-grafted-redux/files/requirements.txt
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
array_record==0.5.1
|
2 |
+
wandb==0.18.7
|
3 |
+
nvidia-curand-cu12==10.3.2.106
|
4 |
+
requests-oauthlib==2.0.0
|
5 |
+
zipp==3.21.0
|
6 |
+
Werkzeug==3.1.3
|
7 |
+
simple-parsing==0.1.6
|
8 |
+
mdurl==0.1.2
|
9 |
+
keras==2.15.0
|
10 |
+
nvidia-cuda-nvcc-cu12==12.6.85
|
11 |
+
google-auth-oauthlib==1.2.1
|
12 |
+
jaxlib==0.4.34
|
13 |
+
tf_keras==2.15.1
|
14 |
+
oauthlib==3.2.2
|
15 |
+
tensorflow-probability==0.25.0
|
16 |
+
cachetools==5.5.0
|
17 |
+
Jinja2==3.1.3
|
18 |
+
rich==13.9.4
|
19 |
+
filelock==3.13.1
|
20 |
+
google-pasta==0.2.0
|
21 |
+
optax==0.2.4
|
22 |
+
toolz==1.0.0
|
23 |
+
gast==0.6.0
|
24 |
+
tensorboard==2.15.2
|
25 |
+
pyasn1_modules==0.4.1
|
26 |
+
nvidia-cudnn-cu12==9.1.0.70
|
27 |
+
opt_einsum==3.4.0
|
28 |
+
nvidia-nvjitlink-cu12==12.6.85
|
29 |
+
chex==0.1.87
|
30 |
+
namex==0.0.8
|
31 |
+
termcolor==2.5.0
|
32 |
+
flax==0.10.2
|
33 |
+
cloudpickle==3.1.0
|
34 |
+
numpy==1.26.4
|
35 |
+
nvidia-nccl-cu12==2.21.5
|
36 |
+
tensorflow-cpu==2.15.0
|
37 |
+
nvidia-cusolver-cu12==11.4.5.107
|
38 |
+
typing_extensions==4.12.2
|
39 |
+
tensorflow-addons==0.23.0
|
40 |
+
typeguard==2.13.3
|
41 |
+
absl-py==2.1.0
|
42 |
+
flatbuffers==24.3.25
|
43 |
+
dlpack==0.1
|
44 |
+
setuptools==65.5.0
|
45 |
+
protobuf==4.25.5
|
46 |
+
jax-cuda12-plugin==0.4.35
|
47 |
+
tensorflow==2.15.0
|
48 |
+
msgpack==1.1.0
|
49 |
+
networkx==3.2.1
|
50 |
+
docker-pycreds==0.4.0
|
51 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
52 |
+
pillow==11.0.0
|
53 |
+
libclang==18.1.1
|
54 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
55 |
+
distrax==0.1.5
|
56 |
+
orbax-checkpoint==0.10.1
|
57 |
+
PyYAML==6.0.2
|
58 |
+
urllib3==2.2.3
|
59 |
+
aqtp==0.8.2
|
60 |
+
tensorflow-metadata==1.16.1
|
61 |
+
etils==1.11.0
|
62 |
+
smmap==5.0.1
|
63 |
+
pyasn1==0.6.1
|
64 |
+
docstring_parser==0.16
|
65 |
+
google-auth==2.36.0
|
66 |
+
simplejson==3.19.3
|
67 |
+
mpmath==1.3.0
|
68 |
+
h5py==3.12.1
|
69 |
+
jax-cuda12-pjrt==0.4.35
|
70 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
71 |
+
tensorflow-estimator==2.15.0
|
72 |
+
triton==3.1.0
|
73 |
+
rsa==4.9
|
74 |
+
panopticapi==0.1
|
75 |
+
tensorflow-hub==0.16.1
|
76 |
+
requests==2.32.3
|
77 |
+
scipy==1.14.1
|
78 |
+
ml-dtypes==0.2.0
|
79 |
+
markdown-it-py==3.0.0
|
80 |
+
tensorflow-text==2.15.0
|
81 |
+
wrapt==1.14.1
|
82 |
+
immutabledict==4.2.1
|
83 |
+
MarkupSafe==3.0.2
|
84 |
+
jax==0.4.35
|
85 |
+
torch==2.5.1+cu121
|
86 |
+
wheel==0.45.1
|
87 |
+
einops==0.8.0
|
88 |
+
sentry-sdk==2.19.0
|
89 |
+
torchvision==0.20.1+cu121
|
90 |
+
humanize==4.11.0
|
91 |
+
toml==0.10.2
|
92 |
+
tensorstore==0.1.69
|
93 |
+
six==1.16.0
|
94 |
+
promise==2.3
|
95 |
+
certifi==2024.8.30
|
96 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
97 |
+
flaxformer==0.8.8
|
98 |
+
nvidia-cufft-cu12==11.0.2.54
|
99 |
+
psutil==6.1.0
|
100 |
+
GitPython==3.1.43
|
101 |
+
platformdirs==4.3.6
|
102 |
+
importlib_resources==6.4.5
|
103 |
+
tfds-nightly==4.9.7.dev202411280044
|
104 |
+
tensorflow-gan==2.1.0
|
105 |
+
googleapis-common-protos==1.66.0
|
106 |
+
overrides==7.7.0
|
107 |
+
optree==0.13.1
|
108 |
+
Pygments==2.18.0
|
109 |
+
astunparse==1.6.3
|
110 |
+
ml_collections==1.0.0
|
111 |
+
setproctitle==1.3.4
|
112 |
+
tensorboard-data-server==0.7.2
|
113 |
+
sympy==1.13.1
|
114 |
+
packaging==24.2
|
115 |
+
nest-asyncio==1.6.0
|
116 |
+
nvidia-cublas-cu12==12.1.3.1
|
117 |
+
gitdb==4.0.11
|
118 |
+
click==8.1.7
|
119 |
+
idna==3.10
|
120 |
+
tqdm==4.67.1
|
121 |
+
grpcio==1.68.0
|
122 |
+
decorator==5.1.1
|
123 |
+
pyarrow==18.1.0
|
124 |
+
clu==0.0.12
|
125 |
+
charset-normalizer==3.4.0
|
126 |
+
fsspec==2024.10.0
|
127 |
+
dm-tree==0.1.8
|
128 |
+
sentencepiece==0.2.0
|
129 |
+
nvidia-cusparse-cu12==12.1.0.106
|
130 |
+
torchaudio==2.5.1+cu121
|
131 |
+
pip==24.3.1
|
132 |
+
Markdown==3.7
|
133 |
+
nvidia-nvtx-cu12==12.1.105
|
grafted/wandb/run-20241127_213838-torch-grafted-redux/files/wandb-metadata.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-6.8.0-49-generic-x86_64-with-glibc2.39",
|
3 |
+
"python": "3.11.10",
|
4 |
+
"startedAt": "2024-11-28T05:38:38.667951Z",
|
5 |
+
"args": [
|
6 |
+
"--config",
|
7 |
+
"/home/jason-chou/Downloads/big_vision/big_vision/configs/vit_s16_i1k_single_gpu_test.py",
|
8 |
+
"--workdir",
|
9 |
+
"/data/imagenet/grafted",
|
10 |
+
"--name",
|
11 |
+
"torch-grafted-redux"
|
12 |
+
],
|
13 |
+
"program": "-m big_vision.train",
|
14 |
+
"git": {
|
15 |
+
"remote": "https://github.com/EIFY/big_vision.git",
|
16 |
+
"commit": "44649a64ff67e709f55cdb5e3adcf52064b17de5"
|
17 |
+
},
|
18 |
+
"email": "[email protected]",
|
19 |
+
"root": "/home/jason-chou/Downloads/big_vision",
|
20 |
+
"host": "jasonchou-TensorBook-late-2021",
|
21 |
+
"username": "jason-chou",
|
22 |
+
"executable": "/home/jason-chou/.pyenv/versions/3.11.10/bin/python",
|
23 |
+
"cpu_count": 8,
|
24 |
+
"cpu_count_logical": 16,
|
25 |
+
"gpu": "NVIDIA GeForce RTX 3080 Laptop GPU",
|
26 |
+
"gpu_count": 1,
|
27 |
+
"disk": {
|
28 |
+
"/": {
|
29 |
+
"total": "1006450962432",
|
30 |
+
"used": "584311005184"
|
31 |
+
}
|
32 |
+
},
|
33 |
+
"memory": {
|
34 |
+
"total": "67162914816"
|
35 |
+
},
|
36 |
+
"cpu": {
|
37 |
+
"count": 8,
|
38 |
+
"countLogical": 16
|
39 |
+
},
|
40 |
+
"gpu_nvidia": [
|
41 |
+
{
|
42 |
+
"name": "NVIDIA GeForce RTX 3080 Laptop GPU",
|
43 |
+
"memoryTotal": "17179869184",
|
44 |
+
"cudaCores": 6144,
|
45 |
+
"architecture": "Ampere"
|
46 |
+
}
|
47 |
+
],
|
48 |
+
"cudaVersion": "12.2"
|
49 |
+
}
|
grafted/wandb/run-20241127_213838-torch-grafted-redux/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"_wandb":{"runtime":6}}
|
grafted/wandb/run-20241127_213838-torch-grafted-redux/logs/debug-internal.log
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2024-11-27T21:38:38.66921709-08:00","level":"INFO","msg":"using version","core version":"0.18.7"}
|
2 |
+
{"time":"2024-11-27T21:38:38.669224917-08:00","level":"INFO","msg":"created symlink","path":"/home/jason-chou/Downloads/big_vision/wandb/run-20241127_213838-torch-grafted-redux/logs/debug-core.log"}
|
3 |
+
{"time":"2024-11-27T21:38:38.772438836-08:00","level":"INFO","msg":"created new stream","id":"torch-grafted-redux"}
|
4 |
+
{"time":"2024-11-27T21:38:38.772485797-08:00","level":"INFO","msg":"stream: started","id":"torch-grafted-redux"}
|
5 |
+
{"time":"2024-11-27T21:38:38.772617036-08:00","level":"INFO","msg":"writer: Do: started","stream_id":"torch-grafted-redux"}
|
6 |
+
{"time":"2024-11-27T21:38:38.77266667-08:00","level":"INFO","msg":"sender: started","stream_id":"torch-grafted-redux"}
|
7 |
+
{"time":"2024-11-27T21:38:38.772762681-08:00","level":"INFO","msg":"handler: started","stream_id":"torch-grafted-redux"}
|
8 |
+
{"time":"2024-11-27T21:38:39.13369457-08:00","level":"INFO","msg":"Starting system monitor"}
|
9 |
+
{"time":"2024-11-27T21:38:45.284216421-08:00","level":"INFO","msg":"stream: closing","id":"torch-grafted-redux"}
|
10 |
+
{"time":"2024-11-27T21:38:45.284267357-08:00","level":"INFO","msg":"Stopping system monitor"}
|
11 |
+
{"time":"2024-11-27T21:38:45.285410301-08:00","level":"INFO","msg":"Stopped system monitor"}
|
12 |
+
{"time":"2024-11-27T21:38:45.379673564-08:00","level":"WARN","msg":"No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job"}
|
13 |
+
{"time":"2024-11-27T21:38:45.37971701-08:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
|
14 |
+
{"time":"2024-11-27T21:38:45.839243387-08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
15 |
+
{"time":"2024-11-27T21:38:45.960027486-08:00","level":"INFO","msg":"handler: closed","stream_id":"torch-grafted-redux"}
|
16 |
+
{"time":"2024-11-27T21:38:45.960124482-08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"torch-grafted-redux"}
|
17 |
+
{"time":"2024-11-27T21:38:45.96015466-08:00","level":"INFO","msg":"sender: closed","stream_id":"torch-grafted-redux"}
|
18 |
+
{"time":"2024-11-27T21:38:45.960287984-08:00","level":"INFO","msg":"stream: closed","id":"torch-grafted-redux"}
|
grafted/wandb/run-20241127_213838-torch-grafted-redux/logs/debug.log
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-11-27 21:38:38,665 INFO MainThread:69602 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
|
2 |
+
2024-11-27 21:38:38,665 INFO MainThread:69602 [wandb_setup.py:_flush():79] Configure stats pid to 69602
|
3 |
+
2024-11-27 21:38:38,665 INFO MainThread:69602 [wandb_setup.py:_flush():79] Loading settings from /home/jason-chou/.config/wandb/settings
|
4 |
+
2024-11-27 21:38:38,665 INFO MainThread:69602 [wandb_setup.py:_flush():79] Loading settings from /home/jason-chou/Downloads/big_vision/wandb/settings
|
5 |
+
2024-11-27 21:38:38,665 INFO MainThread:69602 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
|
6 |
+
2024-11-27 21:38:38,665 INFO MainThread:69602 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
|
7 |
+
2024-11-27 21:38:38,665 WARNING MainThread:69602 [wandb_setup.py:_flush():79] Could not find program at -m big_vision.train
|
8 |
+
2024-11-27 21:38:38,665 INFO MainThread:69602 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m big_vision.train'}
|
9 |
+
2024-11-27 21:38:38,665 INFO MainThread:69602 [wandb_setup.py:_flush():79] Applying login settings: {}
|
10 |
+
2024-11-27 21:38:38,665 INFO MainThread:69602 [wandb_init.py:_log_setup():533] Logging user logs to /home/jason-chou/Downloads/big_vision/wandb/run-20241127_213838-torch-grafted-redux/logs/debug.log
|
11 |
+
2024-11-27 21:38:38,665 INFO MainThread:69602 [wandb_init.py:_log_setup():534] Logging internal logs to /home/jason-chou/Downloads/big_vision/wandb/run-20241127_213838-torch-grafted-redux/logs/debug-internal.log
|
12 |
+
2024-11-27 21:38:38,665 INFO MainThread:69602 [wandb_init.py:init():619] calling init triggers
|
13 |
+
2024-11-27 21:38:38,667 INFO MainThread:69602 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
|
14 |
+
config: {'_fields': {'seed': 0, 'total_epochs': 90, 'num_classes': 1000, 'loss': 'softmax_xent', 'input': accum_freq: 8
|
15 |
+
batch_size: 1024
|
16 |
+
cache_raw: false
|
17 |
+
data:
|
18 |
+
name: imagenet2012
|
19 |
+
split: train
|
20 |
+
pp: decode_jpeg_and_inception_crop(224)|flip_lr|randaug(2,10)|value_range(-1, 1)|onehot(1000,
|
21 |
+
key="label", key_result="labels")|keep("image", "labels")
|
22 |
+
shuffle_buffer_size: 150000
|
23 |
+
, 'pp_modules': ['ops_general', 'ops_image', 'ops_text', 'archive.randaug'], 'log_training_steps': 50, 'ckpt_steps': 1000, 'model_name': 'vit', 'model': pool_type: gap
|
24 |
+
posemb: sincos2d
|
25 |
+
rep_size: false
|
26 |
+
variant: S/16
|
27 |
+
, 'grad_clip_norm': 1.0, 'optax_name': 'scale_by_adam', 'optax': mu_dtype: bfloat16
|
28 |
+
, 'lr': 0.001, 'wd': 0.0001, 'schedule': decay_type: cosine
|
29 |
+
warmup_steps: 10000
|
30 |
+
, 'mixup': fold_in: null
|
31 |
+
p: 0.2
|
32 |
+
, 'evals': val:
|
33 |
+
data:
|
34 |
+
name: imagenet2012
|
35 |
+
split: validation
|
36 |
+
log_steps: 2500
|
37 |
+
loss_name: softmax_xent
|
38 |
+
pp_fn: decode|resize_small(256)|central_crop(224)|value_range(-1, 1)|onehot(1000,
|
39 |
+
key="label", key_result="labels")|keep("image", "labels")
|
40 |
+
type: classification
|
41 |
+
}, '_locked': True, '_type_safe': True, '_convert_dict': True, '_allow_dotted_keys': False, '_sort_keys': True}
|
42 |
+
2024-11-27 21:38:38,667 INFO MainThread:69602 [wandb_init.py:init():669] starting backend
|
43 |
+
2024-11-27 21:38:38,667 INFO MainThread:69602 [wandb_init.py:init():673] sending inform_init request
|
44 |
+
2024-11-27 21:38:38,667 INFO MainThread:69602 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
45 |
+
2024-11-27 21:38:38,667 INFO MainThread:69602 [wandb_init.py:init():686] backend started and connected
|
46 |
+
2024-11-27 21:38:38,670 INFO MainThread:69602 [wandb_init.py:init():781] updated telemetry
|
47 |
+
2024-11-27 21:38:38,672 INFO MainThread:69602 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
|
48 |
+
2024-11-27 21:38:39,116 INFO MainThread:69602 [wandb_init.py:init():859] run resumed
|
49 |
+
2024-11-27 21:38:39,131 INFO MainThread:69602 [wandb_init.py:init():867] starting run threads in backend
|
50 |
+
2024-11-27 21:38:39,182 INFO MainThread:69602 [wandb_run.py:_console_start():2456] atexit reg
|
51 |
+
2024-11-27 21:38:39,182 INFO MainThread:69602 [wandb_run.py:_redirect():2305] redirect: wrap_raw
|
52 |
+
2024-11-27 21:38:39,182 INFO MainThread:69602 [wandb_run.py:_redirect():2370] Wrapping output streams.
|
53 |
+
2024-11-27 21:38:39,182 INFO MainThread:69602 [wandb_run.py:_redirect():2395] Redirects installed.
|
54 |
+
2024-11-27 21:38:39,183 INFO MainThread:69602 [wandb_init.py:init():911] run started, returning control to user process
|
55 |
+
2024-11-27 21:38:45,287 WARNING MsgRouterThr:69602 [router.py:message_loop():75] message_loop has been closed
|
grafted/wandb/run-20241127_213838-torch-grafted-redux/run-torch-grafted-redux.wandb
ADDED
Binary file (46.3 kB). View file
|
|
grafted/wandb/run-20241127_214327-torch-grafted-redux/files/config.yaml
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_allow_dotted_keys:
|
2 |
+
value: false
|
3 |
+
_convert_dict:
|
4 |
+
value: true
|
5 |
+
_fields:
|
6 |
+
value:
|
7 |
+
ckpt_steps: 1000
|
8 |
+
evals: |
|
9 |
+
val:
|
10 |
+
data:
|
11 |
+
name: imagenet2012
|
12 |
+
split: validation
|
13 |
+
log_steps: 2500
|
14 |
+
loss_name: softmax_xent
|
15 |
+
pp_fn: decode|resize_small(256)|central_crop(224)|value_range(-1, 1)|onehot(1000,
|
16 |
+
key="label", key_result="labels")|keep("image", "labels")
|
17 |
+
type: classification
|
18 |
+
grad_clip_norm: 1
|
19 |
+
input: |
|
20 |
+
accum_freq: 8
|
21 |
+
batch_size: 1024
|
22 |
+
cache_raw: false
|
23 |
+
data:
|
24 |
+
name: imagenet2012
|
25 |
+
split: train
|
26 |
+
pp: decode_jpeg_and_inception_crop(224)|flip_lr|randaug(2,10)|value_range(-1, 1)|onehot(1000,
|
27 |
+
key="label", key_result="labels")|keep("image", "labels")
|
28 |
+
shuffle_buffer_size: 150000
|
29 |
+
log_training_steps: 50
|
30 |
+
loss: softmax_xent
|
31 |
+
lr: 0.001
|
32 |
+
mixup: |
|
33 |
+
fold_in: null
|
34 |
+
p: 0.2
|
35 |
+
model: |
|
36 |
+
pool_type: gap
|
37 |
+
posemb: sincos2d
|
38 |
+
rep_size: false
|
39 |
+
variant: S/16
|
40 |
+
model_name: vit
|
41 |
+
num_classes: 1000
|
42 |
+
optax: |
|
43 |
+
mu_dtype: bfloat16
|
44 |
+
optax_name: scale_by_adam
|
45 |
+
pp_modules:
|
46 |
+
- ops_general
|
47 |
+
- ops_image
|
48 |
+
- ops_text
|
49 |
+
- archive.randaug
|
50 |
+
schedule: |
|
51 |
+
decay_type: cosine
|
52 |
+
warmup_steps: 10000
|
53 |
+
seed: 0
|
54 |
+
total_epochs: 90
|
55 |
+
wd: 0.0001
|
56 |
+
_locked:
|
57 |
+
value: true
|
58 |
+
_sort_keys:
|
59 |
+
value: true
|
60 |
+
_type_safe:
|
61 |
+
value: true
|
62 |
+
_wandb:
|
63 |
+
value:
|
64 |
+
cli_version: 0.18.7
|
65 |
+
m: []
|
66 |
+
python_version: 3.11.10
|
67 |
+
t:
|
68 |
+
"1":
|
69 |
+
- 1
|
70 |
+
- 2
|
71 |
+
- 3
|
72 |
+
- 12
|
73 |
+
- 41
|
74 |
+
- 45
|
75 |
+
- 55
|
76 |
+
"2":
|
77 |
+
- 1
|
78 |
+
- 2
|
79 |
+
- 3
|
80 |
+
- 12
|
81 |
+
- 41
|
82 |
+
- 45
|
83 |
+
- 55
|
84 |
+
"3":
|
85 |
+
- 5
|
86 |
+
- 13
|
87 |
+
- 14
|
88 |
+
- 16
|
89 |
+
- 23
|
90 |
+
- 55
|
91 |
+
- 62
|
92 |
+
"4": 3.11.10
|
93 |
+
"5": 0.18.7
|
94 |
+
"8":
|
95 |
+
- 5
|
96 |
+
"12": 0.18.7
|
97 |
+
"13": linux-x86_64
|
grafted/wandb/run-20241127_214327-torch-grafted-redux/files/output.log
ADDED
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
I1127 21:43:28.307721 123439371660160 train.py:125] [33mNOTE[0m: Initializing train dataset...
|
2 |
+
I1127 21:43:28.307887 123439371660160 train.py:125] [33mNOTE[0m: Global batch size 1024 on 1 hosts results in 1024 local batch size. With 1 dev per host (1 dev total), that's a 1024 per-device batch size.
|
3 |
+
I1127 21:43:28.459977 123439371660160 dataset_info.py:707] Load dataset info from /data/tensorflow_datasets/imagenet2012/5.1.0
|
4 |
+
I1127 21:43:28.478083 123439371660160 reader.py:261] Creating a tf.data.Dataset reading 1024 files located in folders: /data/tensorflow_datasets/imagenet2012/5.1.0.
|
5 |
+
WARNING:tensorflow:From /home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/tensorflow_datasets/core/reader.py:101: CounterV2 (from tensorflow.python.data.experimental.ops.counter) is deprecated and will be removed in a future version.
|
6 |
+
Instructions for updating:
|
7 |
+
Use `tf.data.Dataset.counter(...)` instead.
|
8 |
+
W1127 21:43:28.511688 123439371660160 deprecation.py:50] From /home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/tensorflow_datasets/core/reader.py:101: CounterV2 (from tensorflow.python.data.experimental.ops.counter) is deprecated and will be removed in a future version.
|
9 |
+
Instructions for updating:
|
10 |
+
Use `tf.data.Dataset.counter(...)` instead.
|
11 |
+
I1127 21:43:28.534458 123439371660160 logging_logger.py:49] Constructing tf.data.Dataset imagenet2012 for split _EvenSplit(split='train', index=0, count=1, drop_remainder=False), from /data/tensorflow_datasets/imagenet2012/5.1.0
|
12 |
+
I1127 21:43:28.601723 123439371660160 api.py:460] Data before pre-processing:
|
13 |
+
{'file_name': <tf.Tensor 'args_1:0' shape=() dtype=string>, 'image': <tf.Tensor 'args_2:0' shape=() dtype=string>, 'label': <tf.Tensor 'args_3:0' shape=() dtype=int64>, 'tfds_id': <tf.Tensor 'args_4:0' shape=() dtype=string>, '_id': <tf.Tensor 'args_0:0' shape=() dtype=int32>}
|
14 |
+
INFO:tensorflow:Using RandAug.
|
15 |
+
I1127 21:43:28.866792 123439371660160 api.py:460] Using RandAug.
|
16 |
+
WARNING:tensorflow:From /home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/tensorflow/python/util/dispatch.py:1260: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
|
17 |
+
Instructions for updating:
|
18 |
+
Use `tf.cast` instead.
|
19 |
+
W1127 21:43:29.046336 123439371660160 deprecation.py:50] From /home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/tensorflow/python/util/dispatch.py:1260: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
|
20 |
+
Instructions for updating:
|
21 |
+
Use `tf.cast` instead.
|
22 |
+
I1127 21:43:31.278083 123439371660160 api.py:460] Data after pre-processing:
|
23 |
+
{'image': <tf.Tensor 'add:0' shape=(224, 224, 3) dtype=float32>, 'labels': <tf.Tensor 'one_hot:0' shape=(1000,) dtype=float32>}
|
24 |
+
I1127 21:43:31.349534 123439371660160 train.py:125] [33mNOTE[0m: Running for 112603 steps, that means 90.000345 epochs
|
25 |
+
I1127 21:43:31.991644 123439371660160 train.py:125] [33mNOTE[0m: Creating model...
|
26 |
+
Weight decay for: conv_proj.weight
|
27 |
+
Weight decay for: encoder.layers.encoder_layer_0.self_attention.in_proj_weight
|
28 |
+
Weight decay for: encoder.layers.encoder_layer_0.self_attention.out_proj.weight
|
29 |
+
Weight decay for: encoder.layers.encoder_layer_0.mlp.0.weight
|
30 |
+
Weight decay for: encoder.layers.encoder_layer_0.mlp.3.weight
|
31 |
+
Weight decay for: encoder.layers.encoder_layer_1.self_attention.in_proj_weight
|
32 |
+
Weight decay for: encoder.layers.encoder_layer_1.self_attention.out_proj.weight
|
33 |
+
Weight decay for: encoder.layers.encoder_layer_1.mlp.0.weight
|
34 |
+
Weight decay for: encoder.layers.encoder_layer_1.mlp.3.weight
|
35 |
+
Weight decay for: encoder.layers.encoder_layer_2.self_attention.in_proj_weight
|
36 |
+
Weight decay for: encoder.layers.encoder_layer_2.self_attention.out_proj.weight
|
37 |
+
Weight decay for: encoder.layers.encoder_layer_2.mlp.0.weight
|
38 |
+
Weight decay for: encoder.layers.encoder_layer_2.mlp.3.weight
|
39 |
+
Weight decay for: encoder.layers.encoder_layer_3.self_attention.in_proj_weight
|
40 |
+
Weight decay for: encoder.layers.encoder_layer_3.self_attention.out_proj.weight
|
41 |
+
Weight decay for: encoder.layers.encoder_layer_3.mlp.0.weight
|
42 |
+
Weight decay for: encoder.layers.encoder_layer_3.mlp.3.weight
|
43 |
+
Weight decay for: encoder.layers.encoder_layer_4.self_attention.in_proj_weight
|
44 |
+
Weight decay for: encoder.layers.encoder_layer_4.self_attention.out_proj.weight
|
45 |
+
Weight decay for: encoder.layers.encoder_layer_4.mlp.0.weight
|
46 |
+
Weight decay for: encoder.layers.encoder_layer_4.mlp.3.weight
|
47 |
+
Weight decay for: encoder.layers.encoder_layer_5.self_attention.in_proj_weight
|
48 |
+
Weight decay for: encoder.layers.encoder_layer_5.self_attention.out_proj.weight
|
49 |
+
Weight decay for: encoder.layers.encoder_layer_5.mlp.0.weight
|
50 |
+
Weight decay for: encoder.layers.encoder_layer_5.mlp.3.weight
|
51 |
+
Weight decay for: encoder.layers.encoder_layer_6.self_attention.in_proj_weight
|
52 |
+
Weight decay for: encoder.layers.encoder_layer_6.self_attention.out_proj.weight
|
53 |
+
Weight decay for: encoder.layers.encoder_layer_6.mlp.0.weight
|
54 |
+
Weight decay for: encoder.layers.encoder_layer_6.mlp.3.weight
|
55 |
+
Weight decay for: encoder.layers.encoder_layer_7.self_attention.in_proj_weight
|
56 |
+
Weight decay for: encoder.layers.encoder_layer_7.self_attention.out_proj.weight
|
57 |
+
Weight decay for: encoder.layers.encoder_layer_7.mlp.0.weight
|
58 |
+
Weight decay for: encoder.layers.encoder_layer_7.mlp.3.weight
|
59 |
+
Weight decay for: encoder.layers.encoder_layer_8.self_attention.in_proj_weight
|
60 |
+
Weight decay for: encoder.layers.encoder_layer_8.self_attention.out_proj.weight
|
61 |
+
Weight decay for: encoder.layers.encoder_layer_8.mlp.0.weight
|
62 |
+
Weight decay for: encoder.layers.encoder_layer_8.mlp.3.weight
|
63 |
+
Weight decay for: encoder.layers.encoder_layer_9.self_attention.in_proj_weight
|
64 |
+
Weight decay for: encoder.layers.encoder_layer_9.self_attention.out_proj.weight
|
65 |
+
Weight decay for: encoder.layers.encoder_layer_9.mlp.0.weight
|
66 |
+
Weight decay for: encoder.layers.encoder_layer_9.mlp.3.weight
|
67 |
+
Weight decay for: encoder.layers.encoder_layer_10.self_attention.in_proj_weight
|
68 |
+
Weight decay for: encoder.layers.encoder_layer_10.self_attention.out_proj.weight
|
69 |
+
Weight decay for: encoder.layers.encoder_layer_10.mlp.0.weight
|
70 |
+
Weight decay for: encoder.layers.encoder_layer_10.mlp.3.weight
|
71 |
+
Weight decay for: encoder.layers.encoder_layer_11.self_attention.in_proj_weight
|
72 |
+
Weight decay for: encoder.layers.encoder_layer_11.self_attention.out_proj.weight
|
73 |
+
Weight decay for: encoder.layers.encoder_layer_11.mlp.0.weight
|
74 |
+
Weight decay for: encoder.layers.encoder_layer_11.mlp.3.weight
|
75 |
+
Weight decay for: heads.head.weight
|
76 |
+
Weight decay for: conv_proj.weight
|
77 |
+
Weight decay for: encoder.layers.encoder_layer_0.self_attention.in_proj_weight
|
78 |
+
Weight decay for: encoder.layers.encoder_layer_0.self_attention.out_proj.weight
|
79 |
+
Weight decay for: encoder.layers.encoder_layer_0.mlp.0.weight
|
80 |
+
Weight decay for: encoder.layers.encoder_layer_0.mlp.3.weight
|
81 |
+
Weight decay for: encoder.layers.encoder_layer_1.self_attention.in_proj_weight
|
82 |
+
Weight decay for: encoder.layers.encoder_layer_1.self_attention.out_proj.weight
|
83 |
+
Weight decay for: encoder.layers.encoder_layer_1.mlp.0.weight
|
84 |
+
Weight decay for: encoder.layers.encoder_layer_1.mlp.3.weight
|
85 |
+
Weight decay for: encoder.layers.encoder_layer_2.self_attention.in_proj_weight
|
86 |
+
Weight decay for: encoder.layers.encoder_layer_2.self_attention.out_proj.weight
|
87 |
+
Weight decay for: encoder.layers.encoder_layer_2.mlp.0.weight
|
88 |
+
Weight decay for: encoder.layers.encoder_layer_2.mlp.3.weight
|
89 |
+
Weight decay for: encoder.layers.encoder_layer_3.self_attention.in_proj_weight
|
90 |
+
Weight decay for: encoder.layers.encoder_layer_3.self_attention.out_proj.weight
|
91 |
+
Weight decay for: encoder.layers.encoder_layer_3.mlp.0.weight
|
92 |
+
Weight decay for: encoder.layers.encoder_layer_3.mlp.3.weight
|
93 |
+
Weight decay for: encoder.layers.encoder_layer_4.self_attention.in_proj_weight
|
94 |
+
Weight decay for: encoder.layers.encoder_layer_4.self_attention.out_proj.weight
|
95 |
+
Weight decay for: encoder.layers.encoder_layer_4.mlp.0.weight
|
96 |
+
Weight decay for: encoder.layers.encoder_layer_4.mlp.3.weight
|
97 |
+
Weight decay for: encoder.layers.encoder_layer_5.self_attention.in_proj_weight
|
98 |
+
Weight decay for: encoder.layers.encoder_layer_5.self_attention.out_proj.weight
|
99 |
+
Weight decay for: encoder.layers.encoder_layer_5.mlp.0.weight
|
100 |
+
Weight decay for: encoder.layers.encoder_layer_5.mlp.3.weight
|
101 |
+
Weight decay for: encoder.layers.encoder_layer_6.self_attention.in_proj_weight
|
102 |
+
Weight decay for: encoder.layers.encoder_layer_6.self_attention.out_proj.weight
|
103 |
+
Weight decay for: encoder.layers.encoder_layer_6.mlp.0.weight
|
104 |
+
Weight decay for: encoder.layers.encoder_layer_6.mlp.3.weight
|
105 |
+
Weight decay for: encoder.layers.encoder_layer_7.self_attention.in_proj_weight
|
106 |
+
Weight decay for: encoder.layers.encoder_layer_7.self_attention.out_proj.weight
|
107 |
+
Weight decay for: encoder.layers.encoder_layer_7.mlp.0.weight
|
108 |
+
Weight decay for: encoder.layers.encoder_layer_7.mlp.3.weight
|
109 |
+
Weight decay for: encoder.layers.encoder_layer_8.self_attention.in_proj_weight
|
110 |
+
Weight decay for: encoder.layers.encoder_layer_8.self_attention.out_proj.weight
|
111 |
+
Weight decay for: encoder.layers.encoder_layer_8.mlp.0.weight
|
112 |
+
Weight decay for: encoder.layers.encoder_layer_8.mlp.3.weight
|
113 |
+
Weight decay for: encoder.layers.encoder_layer_9.self_attention.in_proj_weight
|
114 |
+
Weight decay for: encoder.layers.encoder_layer_9.self_attention.out_proj.weight
|
115 |
+
Weight decay for: encoder.layers.encoder_layer_9.mlp.0.weight
|
116 |
+
Weight decay for: encoder.layers.encoder_layer_9.mlp.3.weight
|
117 |
+
Weight decay for: encoder.layers.encoder_layer_10.self_attention.in_proj_weight
|
118 |
+
Weight decay for: encoder.layers.encoder_layer_10.self_attention.out_proj.weight
|
119 |
+
Weight decay for: encoder.layers.encoder_layer_10.mlp.0.weight
|
120 |
+
Weight decay for: encoder.layers.encoder_layer_10.mlp.3.weight
|
121 |
+
Weight decay for: encoder.layers.encoder_layer_11.self_attention.in_proj_weight
|
122 |
+
Weight decay for: encoder.layers.encoder_layer_11.self_attention.out_proj.weight
|
123 |
+
Weight decay for: encoder.layers.encoder_layer_11.mlp.0.weight
|
124 |
+
Weight decay for: encoder.layers.encoder_layer_11.mlp.3.weight
|
125 |
+
Weight decay for: heads.head.weight
|
126 |
+
I1127 21:43:32.855304 123439371660160 train.py:125] [33mNOTE[0m: Running initial or final evals...
|
127 |
+
I1127 21:43:32.855672 123439371660160 train.py:125] [33mNOTE[0m: Init evaluator: val…
|
128 |
+
Steps:0/112603 [0.0%]
|
129 |
+
I1127 21:43:32.857478 123439371660160 reader.py:261] Creating a tf.data.Dataset reading 64 files located in folders: /data/tensorflow_datasets/imagenet2012/5.1.0.
|
130 |
+
I1127 21:43:32.889285 123439371660160 logging_logger.py:49] Constructing tf.data.Dataset imagenet2012 for split _EvenSplit(split='validation', index=0, count=1, drop_remainder=False), from /data/tensorflow_datasets/imagenet2012/5.1.0
|
131 |
+
I1127 21:43:32.923067 123439371660160 api.py:460] Data before pre-processing:
|
132 |
+
{'file_name': <tf.Tensor 'args_1:0' shape=() dtype=string>, 'image': <tf.Tensor 'args_2:0' shape=() dtype=string>, 'label': <tf.Tensor 'args_3:0' shape=() dtype=int64>, 'tfds_id': <tf.Tensor 'args_4:0' shape=() dtype=string>, '_id': <tf.Tensor 'args_0:0' shape=() dtype=int32>}
|
133 |
+
I1127 21:43:33.088486 123439371660160 api.py:460] Data after pre-processing:
|
134 |
+
{'image': <tf.Tensor 'add:0' shape=(224, 224, 3) dtype=float32>, 'labels': <tf.Tensor 'one_hot:0' shape=(1000,) dtype=float32>}
|
135 |
+
I1127 21:43:33.183008 123439371660160 train.py:125] [33mNOTE[0m: val evaluation...
|
136 |
+
Steps:0/112603 [0.0%]
|
137 |
+
I1127 21:44:58.298490 123439371660160 utils.py:1231] [35m[0][0m val/acc@1 = 0.0045041454081632655
|
138 |
+
I1127 21:44:58.298775 123439371660160 utils.py:1231] [35m[0][0m val/loss = 6.883525214633163
|
139 |
+
I1127 21:44:58.298943 123439371660160 utils.py:1231] [35m[0][0m z/secs/eval/val = 85.11567897199711
|
140 |
+
I1127 21:44:58.299036 123439371660160 utils.py:560] TIMING[z/secs/eval/val]: 85.11567897199711
|
141 |
+
I1127 21:44:58.299128 123439371660160 train.py:125] [33mNOTE[0m: Starting training loop, compiling the first step...
|
142 |
+
Traceback (most recent call last):
|
143 |
+
File "<frozen runpy>", line 198, in _run_module_as_main
|
144 |
+
File "<frozen runpy>", line 88, in _run_code
|
145 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/train.py", line 396, in <module>
|
146 |
+
app.run(main)
|
147 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/absl/app.py", line 308, in run
|
148 |
+
_run_main(main, args)
|
149 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/absl/app.py", line 254, in _run_main
|
150 |
+
sys.exit(main(argv))
|
151 |
+
^^^^^^^^^^
|
152 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/train.py", line 328, in main
|
153 |
+
output = model(img)
|
154 |
+
^^^^^^^^^^
|
155 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
|
156 |
+
return self._call_impl(*args, **kwargs)
|
157 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
158 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
|
159 |
+
return forward_call(*args, **kwargs)
|
160 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
161 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/_dynamo/eval_frame.py", line 465, in _fn
|
162 |
+
return fn(*args, **kwargs)
|
163 |
+
^^^^^^^^^^^^^^^^^^^
|
164 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
|
165 |
+
return self._call_impl(*args, **kwargs)
|
166 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
167 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
|
168 |
+
return forward_call(*args, **kwargs)
|
169 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
170 |
+
File "/home/jason-chou/Downloads/big_vision/big_vision/simple_vit.py", line 216, in forward
|
171 |
+
def forward(self, x: torch.Tensor):
|
172 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/_dynamo/eval_frame.py", line 632, in _fn
|
173 |
+
return fn(*args, **kwargs)
|
174 |
+
^^^^^^^^^^^^^^^^^^^
|
175 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/_functorch/aot_autograd.py", line 1100, in forward
|
176 |
+
return compiled_fn(full_args)
|
177 |
+
^^^^^^^^^^^^^^^^^^^^^^
|
178 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 308, in runtime_wrapper
|
179 |
+
all_outs = call_func_at_runtime_with_args(
|
180 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
181 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/utils.py", line 124, in call_func_at_runtime_with_args
|
182 |
+
out = normalize_as_list(f(args))
|
183 |
+
^^^^^^^
|
184 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/utils.py", line 98, in g
|
185 |
+
return f(*args)
|
186 |
+
^^^^^^^^
|
187 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/autograd/function.py", line 575, in apply
|
188 |
+
return super().apply(*args, **kwargs) # type: ignore[misc]
|
189 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
190 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 1525, in forward
|
191 |
+
fw_outs = call_func_at_runtime_with_args(
|
192 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
193 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/utils.py", line 124, in call_func_at_runtime_with_args
|
194 |
+
out = normalize_as_list(f(args))
|
195 |
+
^^^^^^^
|
196 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 488, in wrapper
|
197 |
+
return compiled_fn(runtime_args)
|
198 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^
|
199 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 667, in inner_fn
|
200 |
+
outs = compiled_fn(args)
|
201 |
+
^^^^^^^^^^^^^^^^^
|
202 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/_inductor/codecache.py", line 1478, in __call__
|
203 |
+
return self.current_callable(inputs)
|
204 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
205 |
+
File "/home/jason-chou/.pyenv/versions/3.11.10/lib/python3.11/site-packages/torch/_inductor/utils.py", line 1977, in run
|
206 |
+
return model(new_inputs)
|
207 |
+
^^^^^^^^^^^^^^^^^
|
208 |
+
File "/tmp/torchinductor_jason-chou/ka/ckacaj7mldyv4qgozpmsomr7zpv44qchrg7rtykk4mbmnm67wz36.py", line 1386, in call
|
209 |
+
buf141 = empty_strided_cuda((128, 6, 196, 64), (384, 64, 49152, 1), torch.float32)
|
210 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
211 |
+
torch.OutOfMemoryError: CUDA out of memory. Tried to allocate 38.00 MiB. GPU 0 has a total capacity of 15.74 GiB of which 30.62 MiB is free. Including non-PyTorch memory, this process has 15.69 GiB memory in use. Of the allocated memory 3.51 GiB is allocated by PyTorch, and 176.27 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
|
grafted/wandb/run-20241127_214327-torch-grafted-redux/files/requirements.txt
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
array_record==0.5.1
|
2 |
+
wandb==0.18.7
|
3 |
+
nvidia-curand-cu12==10.3.2.106
|
4 |
+
requests-oauthlib==2.0.0
|
5 |
+
zipp==3.21.0
|
6 |
+
Werkzeug==3.1.3
|
7 |
+
simple-parsing==0.1.6
|
8 |
+
mdurl==0.1.2
|
9 |
+
keras==2.15.0
|
10 |
+
nvidia-cuda-nvcc-cu12==12.6.85
|
11 |
+
google-auth-oauthlib==1.2.1
|
12 |
+
jaxlib==0.4.34
|
13 |
+
tf_keras==2.15.1
|
14 |
+
oauthlib==3.2.2
|
15 |
+
tensorflow-probability==0.25.0
|
16 |
+
cachetools==5.5.0
|
17 |
+
Jinja2==3.1.3
|
18 |
+
rich==13.9.4
|
19 |
+
filelock==3.13.1
|
20 |
+
google-pasta==0.2.0
|
21 |
+
optax==0.2.4
|
22 |
+
toolz==1.0.0
|
23 |
+
gast==0.6.0
|
24 |
+
tensorboard==2.15.2
|
25 |
+
pyasn1_modules==0.4.1
|
26 |
+
nvidia-cudnn-cu12==9.1.0.70
|
27 |
+
opt_einsum==3.4.0
|
28 |
+
nvidia-nvjitlink-cu12==12.6.85
|
29 |
+
chex==0.1.87
|
30 |
+
namex==0.0.8
|
31 |
+
termcolor==2.5.0
|
32 |
+
flax==0.10.2
|
33 |
+
cloudpickle==3.1.0
|
34 |
+
numpy==1.26.4
|
35 |
+
nvidia-nccl-cu12==2.21.5
|
36 |
+
tensorflow-cpu==2.15.0
|
37 |
+
nvidia-cusolver-cu12==11.4.5.107
|
38 |
+
typing_extensions==4.12.2
|
39 |
+
tensorflow-addons==0.23.0
|
40 |
+
typeguard==2.13.3
|
41 |
+
absl-py==2.1.0
|
42 |
+
flatbuffers==24.3.25
|
43 |
+
dlpack==0.1
|
44 |
+
setuptools==65.5.0
|
45 |
+
protobuf==4.25.5
|
46 |
+
jax-cuda12-plugin==0.4.35
|
47 |
+
tensorflow==2.15.0
|
48 |
+
msgpack==1.1.0
|
49 |
+
networkx==3.2.1
|
50 |
+
docker-pycreds==0.4.0
|
51 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
52 |
+
pillow==11.0.0
|
53 |
+
libclang==18.1.1
|
54 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
55 |
+
distrax==0.1.5
|
56 |
+
orbax-checkpoint==0.10.1
|
57 |
+
PyYAML==6.0.2
|
58 |
+
urllib3==2.2.3
|
59 |
+
aqtp==0.8.2
|
60 |
+
tensorflow-metadata==1.16.1
|
61 |
+
etils==1.11.0
|
62 |
+
smmap==5.0.1
|
63 |
+
pyasn1==0.6.1
|
64 |
+
docstring_parser==0.16
|
65 |
+
google-auth==2.36.0
|
66 |
+
simplejson==3.19.3
|
67 |
+
mpmath==1.3.0
|
68 |
+
h5py==3.12.1
|
69 |
+
jax-cuda12-pjrt==0.4.35
|
70 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
71 |
+
tensorflow-estimator==2.15.0
|
72 |
+
pydlpack==0.2.1
|
73 |
+
triton==3.1.0
|
74 |
+
rsa==4.9
|
75 |
+
panopticapi==0.1
|
76 |
+
tensorflow-hub==0.16.1
|
77 |
+
requests==2.32.3
|
78 |
+
scipy==1.14.1
|
79 |
+
ml-dtypes==0.2.0
|
80 |
+
markdown-it-py==3.0.0
|
81 |
+
tensorflow-text==2.15.0
|
82 |
+
wrapt==1.14.1
|
83 |
+
immutabledict==4.2.1
|
84 |
+
MarkupSafe==3.0.2
|
85 |
+
jax==0.4.35
|
86 |
+
torch==2.5.1+cu121
|
87 |
+
wheel==0.45.1
|
88 |
+
einops==0.8.0
|
89 |
+
sentry-sdk==2.19.0
|
90 |
+
torchvision==0.20.1+cu121
|
91 |
+
humanize==4.11.0
|
92 |
+
toml==0.10.2
|
93 |
+
tensorstore==0.1.69
|
94 |
+
six==1.16.0
|
95 |
+
promise==2.3
|
96 |
+
certifi==2024.8.30
|
97 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
98 |
+
flaxformer==0.8.8
|
99 |
+
nvidia-cufft-cu12==11.0.2.54
|
100 |
+
psutil==6.1.0
|
101 |
+
GitPython==3.1.43
|
102 |
+
platformdirs==4.3.6
|
103 |
+
importlib_resources==6.4.5
|
104 |
+
tfds-nightly==4.9.7.dev202411280044
|
105 |
+
tensorflow-gan==2.1.0
|
106 |
+
googleapis-common-protos==1.66.0
|
107 |
+
overrides==7.7.0
|
108 |
+
optree==0.13.1
|
109 |
+
Pygments==2.18.0
|
110 |
+
astunparse==1.6.3
|
111 |
+
ml_collections==1.0.0
|
112 |
+
setproctitle==1.3.4
|
113 |
+
tensorboard-data-server==0.7.2
|
114 |
+
sympy==1.13.1
|
115 |
+
packaging==24.2
|
116 |
+
nest-asyncio==1.6.0
|
117 |
+
nvidia-cublas-cu12==12.1.3.1
|
118 |
+
gitdb==4.0.11
|
119 |
+
click==8.1.7
|
120 |
+
idna==3.10
|
121 |
+
tqdm==4.67.1
|
122 |
+
grpcio==1.68.0
|
123 |
+
decorator==5.1.1
|
124 |
+
pyarrow==18.1.0
|
125 |
+
clu==0.0.12
|
126 |
+
charset-normalizer==3.4.0
|
127 |
+
fsspec==2024.10.0
|
128 |
+
dm-tree==0.1.8
|
129 |
+
sentencepiece==0.2.0
|
130 |
+
nvidia-cusparse-cu12==12.1.0.106
|
131 |
+
torchaudio==2.5.1+cu121
|
132 |
+
pip==24.3.1
|
133 |
+
Markdown==3.7
|
134 |
+
nvidia-nvtx-cu12==12.1.105
|
grafted/wandb/run-20241127_214327-torch-grafted-redux/files/wandb-metadata.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-6.8.0-49-generic-x86_64-with-glibc2.39",
|
3 |
+
"python": "3.11.10",
|
4 |
+
"startedAt": "2024-11-28T05:43:27.740135Z",
|
5 |
+
"args": [
|
6 |
+
"--config",
|
7 |
+
"/home/jason-chou/Downloads/big_vision/big_vision/configs/vit_s16_i1k_single_gpu_test.py",
|
8 |
+
"--workdir",
|
9 |
+
"/data/imagenet/grafted",
|
10 |
+
"--name",
|
11 |
+
"torch-grafted-redux"
|
12 |
+
],
|
13 |
+
"program": "-m big_vision.train",
|
14 |
+
"git": {
|
15 |
+
"remote": "https://github.com/EIFY/big_vision.git",
|
16 |
+
"commit": "44649a64ff67e709f55cdb5e3adcf52064b17de5"
|
17 |
+
},
|
18 |
+
"email": "[email protected]",
|
19 |
+
"root": "/home/jason-chou/Downloads/big_vision",
|
20 |
+
"host": "jasonchou-TensorBook-late-2021",
|
21 |
+
"username": "jason-chou",
|
22 |
+
"executable": "/home/jason-chou/.pyenv/versions/3.11.10/bin/python",
|
23 |
+
"cpu_count": 8,
|
24 |
+
"cpu_count_logical": 16,
|
25 |
+
"gpu": "NVIDIA GeForce RTX 3080 Laptop GPU",
|
26 |
+
"gpu_count": 1,
|
27 |
+
"disk": {
|
28 |
+
"/": {
|
29 |
+
"total": "1006450962432",
|
30 |
+
"used": "584312172544"
|
31 |
+
}
|
32 |
+
},
|
33 |
+
"memory": {
|
34 |
+
"total": "67162914816"
|
35 |
+
},
|
36 |
+
"cpu": {
|
37 |
+
"count": 8,
|
38 |
+
"countLogical": 16
|
39 |
+
},
|
40 |
+
"gpu_nvidia": [
|
41 |
+
{
|
42 |
+
"name": "NVIDIA GeForce RTX 3080 Laptop GPU",
|
43 |
+
"memoryTotal": "17179869184",
|
44 |
+
"cudaCores": 6144,
|
45 |
+
"architecture": "Ampere"
|
46 |
+
}
|
47 |
+
],
|
48 |
+
"cudaVersion": "12.2"
|
49 |
+
}
|
grafted/wandb/run-20241127_214327-torch-grafted-redux/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"_wandb":{"runtime":118}}
|
grafted/wandb/run-20241127_214327-torch-grafted-redux/logs/debug-internal.log
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2024-11-27T21:43:27.742580416-08:00","level":"INFO","msg":"using version","core version":"0.18.7"}
|
2 |
+
{"time":"2024-11-27T21:43:27.742604921-08:00","level":"INFO","msg":"created symlink","path":"/home/jason-chou/Downloads/big_vision/wandb/run-20241127_214327-torch-grafted-redux/logs/debug-core.log"}
|
3 |
+
{"time":"2024-11-27T21:43:27.845793864-08:00","level":"INFO","msg":"created new stream","id":"torch-grafted-redux"}
|
4 |
+
{"time":"2024-11-27T21:43:27.845820975-08:00","level":"INFO","msg":"stream: started","id":"torch-grafted-redux"}
|
5 |
+
{"time":"2024-11-27T21:43:27.845933768-08:00","level":"INFO","msg":"writer: Do: started","stream_id":"torch-grafted-redux"}
|
6 |
+
{"time":"2024-11-27T21:43:27.846046124-08:00","level":"INFO","msg":"handler: started","stream_id":"torch-grafted-redux"}
|
7 |
+
{"time":"2024-11-27T21:43:27.846062359-08:00","level":"INFO","msg":"sender: started","stream_id":"torch-grafted-redux"}
|
8 |
+
{"time":"2024-11-27T21:43:28.25683485-08:00","level":"INFO","msg":"Starting system monitor"}
|
9 |
+
{"time":"2024-11-27T21:45:20.049310897-08:00","level":"INFO","msg":"stream: closing","id":"torch-grafted-redux"}
|
10 |
+
{"time":"2024-11-27T21:45:20.0498735-08:00","level":"INFO","msg":"Stopping system monitor"}
|
11 |
+
{"time":"2024-11-27T21:45:20.052125471-08:00","level":"INFO","msg":"Stopped system monitor"}
|
12 |
+
{"time":"2024-11-27T21:45:20.144258999-08:00","level":"WARN","msg":"No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job"}
|
13 |
+
{"time":"2024-11-27T21:45:20.144292029-08:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
|
14 |
+
{"time":"2024-11-27T21:45:20.611424696-08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
15 |
+
{"time":"2024-11-27T21:45:20.75808152-08:00","level":"INFO","msg":"handler: closed","stream_id":"torch-grafted-redux"}
|
16 |
+
{"time":"2024-11-27T21:45:20.758156552-08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"torch-grafted-redux"}
|
17 |
+
{"time":"2024-11-27T21:45:20.75839778-08:00","level":"INFO","msg":"sender: closed","stream_id":"torch-grafted-redux"}
|
18 |
+
{"time":"2024-11-27T21:45:20.758445214-08:00","level":"INFO","msg":"stream: closed","id":"torch-grafted-redux"}
|
grafted/wandb/run-20241127_214327-torch-grafted-redux/logs/debug.log
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-11-27 21:43:27,731 INFO MainThread:70768 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
|
2 |
+
2024-11-27 21:43:27,731 INFO MainThread:70768 [wandb_setup.py:_flush():79] Configure stats pid to 70768
|
3 |
+
2024-11-27 21:43:27,731 INFO MainThread:70768 [wandb_setup.py:_flush():79] Loading settings from /home/jason-chou/.config/wandb/settings
|
4 |
+
2024-11-27 21:43:27,731 INFO MainThread:70768 [wandb_setup.py:_flush():79] Loading settings from /home/jason-chou/Downloads/big_vision/wandb/settings
|
5 |
+
2024-11-27 21:43:27,731 INFO MainThread:70768 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
|
6 |
+
2024-11-27 21:43:27,731 INFO MainThread:70768 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
|
7 |
+
2024-11-27 21:43:27,731 WARNING MainThread:70768 [wandb_setup.py:_flush():79] Could not find program at -m big_vision.train
|
8 |
+
2024-11-27 21:43:27,731 INFO MainThread:70768 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m big_vision.train'}
|
9 |
+
2024-11-27 21:43:27,732 INFO MainThread:70768 [wandb_setup.py:_flush():79] Applying login settings: {}
|
10 |
+
2024-11-27 21:43:27,732 INFO MainThread:70768 [wandb_init.py:_log_setup():533] Logging user logs to /home/jason-chou/Downloads/big_vision/wandb/run-20241127_214327-torch-grafted-redux/logs/debug.log
|
11 |
+
2024-11-27 21:43:27,732 INFO MainThread:70768 [wandb_init.py:_log_setup():534] Logging internal logs to /home/jason-chou/Downloads/big_vision/wandb/run-20241127_214327-torch-grafted-redux/logs/debug-internal.log
|
12 |
+
2024-11-27 21:43:27,732 INFO MainThread:70768 [wandb_init.py:init():619] calling init triggers
|
13 |
+
2024-11-27 21:43:27,737 INFO MainThread:70768 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
|
14 |
+
config: {'_fields': {'seed': 0, 'total_epochs': 90, 'num_classes': 1000, 'loss': 'softmax_xent', 'input': accum_freq: 8
|
15 |
+
batch_size: 1024
|
16 |
+
cache_raw: false
|
17 |
+
data:
|
18 |
+
name: imagenet2012
|
19 |
+
split: train
|
20 |
+
pp: decode_jpeg_and_inception_crop(224)|flip_lr|randaug(2,10)|value_range(-1, 1)|onehot(1000,
|
21 |
+
key="label", key_result="labels")|keep("image", "labels")
|
22 |
+
shuffle_buffer_size: 150000
|
23 |
+
, 'pp_modules': ['ops_general', 'ops_image', 'ops_text', 'archive.randaug'], 'log_training_steps': 50, 'ckpt_steps': 1000, 'model_name': 'vit', 'model': pool_type: gap
|
24 |
+
posemb: sincos2d
|
25 |
+
rep_size: false
|
26 |
+
variant: S/16
|
27 |
+
, 'grad_clip_norm': 1.0, 'optax_name': 'scale_by_adam', 'optax': mu_dtype: bfloat16
|
28 |
+
, 'lr': 0.001, 'wd': 0.0001, 'schedule': decay_type: cosine
|
29 |
+
warmup_steps: 10000
|
30 |
+
, 'mixup': fold_in: null
|
31 |
+
p: 0.2
|
32 |
+
, 'evals': val:
|
33 |
+
data:
|
34 |
+
name: imagenet2012
|
35 |
+
split: validation
|
36 |
+
log_steps: 2500
|
37 |
+
loss_name: softmax_xent
|
38 |
+
pp_fn: decode|resize_small(256)|central_crop(224)|value_range(-1, 1)|onehot(1000,
|
39 |
+
key="label", key_result="labels")|keep("image", "labels")
|
40 |
+
type: classification
|
41 |
+
}, '_locked': True, '_type_safe': True, '_convert_dict': True, '_allow_dotted_keys': False, '_sort_keys': True}
|
42 |
+
2024-11-27 21:43:27,737 INFO MainThread:70768 [wandb_init.py:init():669] starting backend
|
43 |
+
2024-11-27 21:43:27,737 INFO MainThread:70768 [wandb_init.py:init():673] sending inform_init request
|
44 |
+
2024-11-27 21:43:27,739 INFO MainThread:70768 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
45 |
+
2024-11-27 21:43:27,739 INFO MainThread:70768 [wandb_init.py:init():686] backend started and connected
|
46 |
+
2024-11-27 21:43:27,745 INFO MainThread:70768 [wandb_init.py:init():781] updated telemetry
|
47 |
+
2024-11-27 21:43:27,748 INFO MainThread:70768 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
|
48 |
+
2024-11-27 21:43:28,243 INFO MainThread:70768 [wandb_init.py:init():859] run resumed
|
49 |
+
2024-11-27 21:43:28,254 INFO MainThread:70768 [wandb_init.py:init():867] starting run threads in backend
|
50 |
+
2024-11-27 21:43:28,306 INFO MainThread:70768 [wandb_run.py:_console_start():2456] atexit reg
|
51 |
+
2024-11-27 21:43:28,306 INFO MainThread:70768 [wandb_run.py:_redirect():2305] redirect: wrap_raw
|
52 |
+
2024-11-27 21:43:28,306 INFO MainThread:70768 [wandb_run.py:_redirect():2370] Wrapping output streams.
|
53 |
+
2024-11-27 21:43:28,306 INFO MainThread:70768 [wandb_run.py:_redirect():2395] Redirects installed.
|
54 |
+
2024-11-27 21:43:28,307 INFO MainThread:70768 [wandb_init.py:init():911] run started, returning control to user process
|
55 |
+
2024-11-27 21:45:20,049 WARNING MsgRouterThr:70768 [router.py:message_loop():75] message_loop has been closed
|
grafted/wandb/run-20241127_214327-torch-grafted-redux/run-torch-grafted-redux.wandb
ADDED
Binary file (70.8 kB). View file
|
|
grafted/wandb/run-20241127_215015-torch-grafted-redux/files/config.yaml
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
_allow_dotted_keys:
|
2 |
+
value: false
|
3 |
+
_convert_dict:
|
4 |
+
value: true
|
5 |
+
_fields:
|
6 |
+
value:
|
7 |
+
ckpt_steps: 1000
|
8 |
+
evals: |
|
9 |
+
val:
|
10 |
+
data:
|
11 |
+
name: imagenet2012
|
12 |
+
split: validation
|
13 |
+
log_steps: 2500
|
14 |
+
loss_name: softmax_xent
|
15 |
+
pp_fn: decode|resize_small(256)|central_crop(224)|value_range(-1, 1)|onehot(1000,
|
16 |
+
key="label", key_result="labels")|keep("image", "labels")
|
17 |
+
type: classification
|
18 |
+
grad_clip_norm: 1
|
19 |
+
input: |
|
20 |
+
accum_freq: 8
|
21 |
+
batch_size: 1024
|
22 |
+
cache_raw: false
|
23 |
+
data:
|
24 |
+
name: imagenet2012
|
25 |
+
split: train
|
26 |
+
pp: decode_jpeg_and_inception_crop(224)|flip_lr|randaug(2,10)|value_range(-1, 1)|onehot(1000,
|
27 |
+
key="label", key_result="labels")|keep("image", "labels")
|
28 |
+
shuffle_buffer_size: 150000
|
29 |
+
log_training_steps: 50
|
30 |
+
loss: softmax_xent
|
31 |
+
lr: 0.001
|
32 |
+
mixup: |
|
33 |
+
fold_in: null
|
34 |
+
p: 0.2
|
35 |
+
model: |
|
36 |
+
pool_type: gap
|
37 |
+
posemb: sincos2d
|
38 |
+
rep_size: false
|
39 |
+
variant: S/16
|
40 |
+
model_name: vit
|
41 |
+
num_classes: 1000
|
42 |
+
optax: |
|
43 |
+
mu_dtype: bfloat16
|
44 |
+
optax_name: scale_by_adam
|
45 |
+
pp_modules:
|
46 |
+
- ops_general
|
47 |
+
- ops_image
|
48 |
+
- ops_text
|
49 |
+
- archive.randaug
|
50 |
+
schedule: |
|
51 |
+
decay_type: cosine
|
52 |
+
warmup_steps: 10000
|
53 |
+
seed: 0
|
54 |
+
total_epochs: 90
|
55 |
+
wd: 0.0001
|
56 |
+
_locked:
|
57 |
+
value: true
|
58 |
+
_sort_keys:
|
59 |
+
value: true
|
60 |
+
_type_safe:
|
61 |
+
value: true
|
62 |
+
_wandb:
|
63 |
+
value:
|
64 |
+
cli_version: 0.18.7
|
65 |
+
m: []
|
66 |
+
python_version: 3.11.10
|
67 |
+
t:
|
68 |
+
"1":
|
69 |
+
- 1
|
70 |
+
- 2
|
71 |
+
- 3
|
72 |
+
- 12
|
73 |
+
- 41
|
74 |
+
- 45
|
75 |
+
- 55
|
76 |
+
"2":
|
77 |
+
- 1
|
78 |
+
- 2
|
79 |
+
- 3
|
80 |
+
- 12
|
81 |
+
- 41
|
82 |
+
- 45
|
83 |
+
- 55
|
84 |
+
"3":
|
85 |
+
- 5
|
86 |
+
- 13
|
87 |
+
- 14
|
88 |
+
- 16
|
89 |
+
- 23
|
90 |
+
- 55
|
91 |
+
- 61
|
92 |
+
- 62
|
93 |
+
"4": 3.11.10
|
94 |
+
"5": 0.18.7
|
95 |
+
"8":
|
96 |
+
- 5
|
97 |
+
"12": 0.18.7
|
98 |
+
"13": linux-x86_64
|
grafted/wandb/run-20241127_215015-torch-grafted-redux/files/output.log
ADDED
The diff for this file is too large to render.
See raw diff
|
|
grafted/wandb/run-20241127_215015-torch-grafted-redux/files/requirements.txt
ADDED
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
array_record==0.5.1
|
2 |
+
wandb==0.18.7
|
3 |
+
nvidia-curand-cu12==10.3.2.106
|
4 |
+
requests-oauthlib==2.0.0
|
5 |
+
zipp==3.21.0
|
6 |
+
Werkzeug==3.1.3
|
7 |
+
simple-parsing==0.1.6
|
8 |
+
mdurl==0.1.2
|
9 |
+
keras==2.15.0
|
10 |
+
nvidia-cuda-nvcc-cu12==12.6.85
|
11 |
+
google-auth-oauthlib==1.2.1
|
12 |
+
jaxlib==0.4.34
|
13 |
+
tf_keras==2.15.1
|
14 |
+
oauthlib==3.2.2
|
15 |
+
tensorflow-probability==0.25.0
|
16 |
+
cachetools==5.5.0
|
17 |
+
Jinja2==3.1.3
|
18 |
+
rich==13.9.4
|
19 |
+
filelock==3.13.1
|
20 |
+
google-pasta==0.2.0
|
21 |
+
optax==0.2.4
|
22 |
+
toolz==1.0.0
|
23 |
+
gast==0.6.0
|
24 |
+
tensorboard==2.15.2
|
25 |
+
pyasn1_modules==0.4.1
|
26 |
+
nvidia-cudnn-cu12==9.1.0.70
|
27 |
+
opt_einsum==3.4.0
|
28 |
+
nvidia-nvjitlink-cu12==12.6.85
|
29 |
+
chex==0.1.87
|
30 |
+
namex==0.0.8
|
31 |
+
termcolor==2.5.0
|
32 |
+
flax==0.10.2
|
33 |
+
cloudpickle==3.1.0
|
34 |
+
numpy==1.26.4
|
35 |
+
nvidia-nccl-cu12==2.21.5
|
36 |
+
tensorflow-cpu==2.15.0
|
37 |
+
nvidia-cusolver-cu12==11.4.5.107
|
38 |
+
typing_extensions==4.12.2
|
39 |
+
tensorflow-addons==0.23.0
|
40 |
+
typeguard==2.13.3
|
41 |
+
absl-py==2.1.0
|
42 |
+
flatbuffers==24.3.25
|
43 |
+
dlpack==0.1
|
44 |
+
setuptools==65.5.0
|
45 |
+
protobuf==4.25.5
|
46 |
+
jax-cuda12-plugin==0.4.35
|
47 |
+
tensorflow==2.15.0
|
48 |
+
msgpack==1.1.0
|
49 |
+
networkx==3.2.1
|
50 |
+
docker-pycreds==0.4.0
|
51 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
52 |
+
pillow==11.0.0
|
53 |
+
libclang==18.1.1
|
54 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
55 |
+
distrax==0.1.5
|
56 |
+
orbax-checkpoint==0.10.1
|
57 |
+
PyYAML==6.0.2
|
58 |
+
urllib3==2.2.3
|
59 |
+
aqtp==0.8.2
|
60 |
+
tensorflow-metadata==1.16.1
|
61 |
+
etils==1.11.0
|
62 |
+
smmap==5.0.1
|
63 |
+
pyasn1==0.6.1
|
64 |
+
docstring_parser==0.16
|
65 |
+
google-auth==2.36.0
|
66 |
+
simplejson==3.19.3
|
67 |
+
mpmath==1.3.0
|
68 |
+
h5py==3.12.1
|
69 |
+
jax-cuda12-pjrt==0.4.35
|
70 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
71 |
+
tensorflow-estimator==2.15.0
|
72 |
+
pydlpack==0.2.1
|
73 |
+
triton==3.1.0
|
74 |
+
rsa==4.9
|
75 |
+
panopticapi==0.1
|
76 |
+
tensorflow-hub==0.16.1
|
77 |
+
requests==2.32.3
|
78 |
+
scipy==1.14.1
|
79 |
+
ml-dtypes==0.2.0
|
80 |
+
markdown-it-py==3.0.0
|
81 |
+
tensorflow-text==2.15.0
|
82 |
+
wrapt==1.14.1
|
83 |
+
immutabledict==4.2.1
|
84 |
+
MarkupSafe==3.0.2
|
85 |
+
jax==0.4.35
|
86 |
+
torch==2.5.1+cu121
|
87 |
+
wheel==0.45.1
|
88 |
+
einops==0.8.0
|
89 |
+
sentry-sdk==2.19.0
|
90 |
+
torchvision==0.20.1+cu121
|
91 |
+
humanize==4.11.0
|
92 |
+
toml==0.10.2
|
93 |
+
tensorstore==0.1.69
|
94 |
+
six==1.16.0
|
95 |
+
promise==2.3
|
96 |
+
certifi==2024.8.30
|
97 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
98 |
+
flaxformer==0.8.8
|
99 |
+
nvidia-cufft-cu12==11.0.2.54
|
100 |
+
psutil==6.1.0
|
101 |
+
GitPython==3.1.43
|
102 |
+
platformdirs==4.3.6
|
103 |
+
importlib_resources==6.4.5
|
104 |
+
tfds-nightly==4.9.7.dev202411280044
|
105 |
+
tensorflow-gan==2.1.0
|
106 |
+
googleapis-common-protos==1.66.0
|
107 |
+
overrides==7.7.0
|
108 |
+
optree==0.13.1
|
109 |
+
Pygments==2.18.0
|
110 |
+
astunparse==1.6.3
|
111 |
+
ml_collections==1.0.0
|
112 |
+
setproctitle==1.3.4
|
113 |
+
tensorboard-data-server==0.7.2
|
114 |
+
sympy==1.13.1
|
115 |
+
packaging==24.2
|
116 |
+
nest-asyncio==1.6.0
|
117 |
+
nvidia-cublas-cu12==12.1.3.1
|
118 |
+
gitdb==4.0.11
|
119 |
+
click==8.1.7
|
120 |
+
idna==3.10
|
121 |
+
tqdm==4.67.1
|
122 |
+
grpcio==1.68.0
|
123 |
+
decorator==5.1.1
|
124 |
+
pyarrow==18.1.0
|
125 |
+
clu==0.0.12
|
126 |
+
charset-normalizer==3.4.0
|
127 |
+
fsspec==2024.10.0
|
128 |
+
dm-tree==0.1.8
|
129 |
+
sentencepiece==0.2.0
|
130 |
+
nvidia-cusparse-cu12==12.1.0.106
|
131 |
+
torchaudio==2.5.1+cu121
|
132 |
+
pip==24.3.1
|
133 |
+
Markdown==3.7
|
134 |
+
nvidia-nvtx-cu12==12.1.105
|
grafted/wandb/run-20241127_215015-torch-grafted-redux/files/wandb-metadata.json
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"os": "Linux-6.8.0-49-generic-x86_64-with-glibc2.39",
|
3 |
+
"python": "3.11.10",
|
4 |
+
"startedAt": "2024-11-28T05:50:15.071664Z",
|
5 |
+
"args": [
|
6 |
+
"--config",
|
7 |
+
"/home/jason-chou/Downloads/big_vision/big_vision/configs/vit_s16_i1k_single_gpu_test.py",
|
8 |
+
"--workdir",
|
9 |
+
"/data/imagenet/grafted",
|
10 |
+
"--name",
|
11 |
+
"torch-grafted-redux"
|
12 |
+
],
|
13 |
+
"program": "-m big_vision.train",
|
14 |
+
"git": {
|
15 |
+
"remote": "https://github.com/EIFY/big_vision.git",
|
16 |
+
"commit": "44649a64ff67e709f55cdb5e3adcf52064b17de5"
|
17 |
+
},
|
18 |
+
"email": "[email protected]",
|
19 |
+
"root": "/home/jason-chou/Downloads/big_vision",
|
20 |
+
"host": "jasonchou-TensorBook-late-2021",
|
21 |
+
"username": "jason-chou",
|
22 |
+
"executable": "/home/jason-chou/.pyenv/versions/3.11.10/bin/python",
|
23 |
+
"cpu_count": 8,
|
24 |
+
"cpu_count_logical": 16,
|
25 |
+
"gpu": "NVIDIA GeForce RTX 3080 Laptop GPU",
|
26 |
+
"gpu_count": 1,
|
27 |
+
"disk": {
|
28 |
+
"/": {
|
29 |
+
"total": "1006450962432",
|
30 |
+
"used": "584324419584"
|
31 |
+
}
|
32 |
+
},
|
33 |
+
"memory": {
|
34 |
+
"total": "67162914816"
|
35 |
+
},
|
36 |
+
"cpu": {
|
37 |
+
"count": 8,
|
38 |
+
"countLogical": 16
|
39 |
+
},
|
40 |
+
"gpu_nvidia": [
|
41 |
+
{
|
42 |
+
"name": "NVIDIA GeForce RTX 3080 Laptop GPU",
|
43 |
+
"memoryTotal": "17179869184",
|
44 |
+
"cudaCores": 6144,
|
45 |
+
"architecture": "Ampere"
|
46 |
+
}
|
47 |
+
],
|
48 |
+
"cudaVersion": "12.2"
|
49 |
+
}
|
grafted/wandb/run-20241127_215015-torch-grafted-redux/files/wandb-summary.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"lr":2.3437951579552636e-13,"progress":1,"l2_grads":2.6440370082855225,"val/loss":0.919307008081553,"val/acc@1":0.7654257015306123,"l2_params":237.9905797356025,"uptime":705834.820052186,"_wandb":{"runtime":706047},"_step":112603,"core_hours_NVIDIA GeForce RTX 3080 Laptop GPU":196.03091018991725,"train/loss":1.7738897949457169,"img/sec/core":164.21697038941156,"examples_seen":1.15305472e+08,"_timestamp":1.7334789428918557e+09,"_runtime":706047.114314126,"epoch":90.00034499795889,"core_hours":196.03091018991725,"z/secs/eval/val":97.07507979194634}
|
grafted/wandb/run-20241127_215015-torch-grafted-redux/logs/debug-internal.log
ADDED
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"time":"2024-11-27T21:50:15.074175754-08:00","level":"INFO","msg":"using version","core version":"0.18.7"}
|
2 |
+
{"time":"2024-11-27T21:50:15.074199926-08:00","level":"INFO","msg":"created symlink","path":"/home/jason-chou/Downloads/big_vision/wandb/run-20241127_215015-torch-grafted-redux/logs/debug-core.log"}
|
3 |
+
{"time":"2024-11-27T21:50:15.178795589-08:00","level":"INFO","msg":"created new stream","id":"torch-grafted-redux"}
|
4 |
+
{"time":"2024-11-27T21:50:15.178841831-08:00","level":"INFO","msg":"stream: started","id":"torch-grafted-redux"}
|
5 |
+
{"time":"2024-11-27T21:50:15.179017283-08:00","level":"INFO","msg":"writer: Do: started","stream_id":"torch-grafted-redux"}
|
6 |
+
{"time":"2024-11-27T21:50:15.179013899-08:00","level":"INFO","msg":"sender: started","stream_id":"torch-grafted-redux"}
|
7 |
+
{"time":"2024-11-27T21:50:15.179140053-08:00","level":"INFO","msg":"handler: started","stream_id":"torch-grafted-redux"}
|
8 |
+
{"time":"2024-11-27T21:50:15.742647044-08:00","level":"INFO","msg":"Starting system monitor"}
|
9 |
+
{"time":"2024-11-28T02:34:16.031157097-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
|
10 |
+
{"time":"2024-11-28T02:34:48.365741462-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
11 |
+
{"time":"2024-11-28T02:35:23.360466143-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
12 |
+
{"time":"2024-11-28T02:36:02.179163429-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
13 |
+
{"time":"2024-11-28T02:36:48.506458125-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
14 |
+
{"time":"2024-11-28T02:37:26.943002842-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": read tcp 10.0.0.84:35898->35.186.228.49:443: read: connection reset by peer"}
|
15 |
+
{"time":"2024-11-28T03:42:18.129115838-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": dial tcp 35.186.228.49:443: connect: no route to host"}
|
16 |
+
{"time":"2024-11-28T03:42:20.139827897-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": dial tcp: lookup api.wandb.ai on 127.0.0.53:53: server misbehaving"}
|
17 |
+
{"time":"2024-11-28T03:42:24.372179347-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": dial tcp: lookup api.wandb.ai on 127.0.0.53:53: server misbehaving"}
|
18 |
+
{"time":"2024-11-28T03:42:32.586084468-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": dial tcp: lookup api.wandb.ai on 127.0.0.53:53: server misbehaving"}
|
19 |
+
{"time":"2024-11-28T03:42:46.132654012-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
20 |
+
{"time":"2024-11-28T03:42:52.120982292-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": dial tcp: lookup api.wandb.ai on 127.0.0.53:53: server misbehaving"}
|
21 |
+
{"time":"2024-11-28T03:43:18.285293593-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
22 |
+
{"time":"2024-11-28T03:43:28.907782037-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": dial tcp: lookup api.wandb.ai on 127.0.0.53:53: server misbehaving"}
|
23 |
+
{"time":"2024-11-28T03:43:53.098087627-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
24 |
+
{"time":"2024-11-28T03:44:28.909285538-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": dial tcp: lookup api.wandb.ai on 127.0.0.53:53: server misbehaving"}
|
25 |
+
{"time":"2024-11-28T03:44:31.614526811-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
26 |
+
{"time":"2024-11-28T03:45:17.871096255-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
27 |
+
{"time":"2024-11-28T03:45:28.911117824-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": dial tcp: lookup api.wandb.ai on 127.0.0.53:53: server misbehaving"}
|
28 |
+
{"time":"2024-11-28T04:27:01.204698067-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
29 |
+
{"time":"2024-11-28T04:27:33.238366122-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
30 |
+
{"time":"2024-11-28T04:28:07.420519765-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
31 |
+
{"time":"2024-11-28T04:28:47.085988867-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
32 |
+
{"time":"2024-11-28T04:29:35.767860694-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
33 |
+
{"time":"2024-11-28T08:14:46.411826832-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
34 |
+
{"time":"2024-11-29T07:48:56.401765469-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": dial tcp 35.186.228.49:443: connect: no route to host"}
|
35 |
+
{"time":"2024-11-29T15:48:03.006388984-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
36 |
+
{"time":"2024-11-29T15:48:35.3494534-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
37 |
+
{"time":"2024-11-29T15:49:10.275739645-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
38 |
+
{"time":"2024-11-30T03:43:35.70265325-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": read tcp 10.0.0.84:53632->35.186.228.49:443: read: connection reset by peer"}
|
39 |
+
{"time":"2024-11-30T04:33:41.905126209-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": dial tcp 35.186.228.49:443: connect: no route to host"}
|
40 |
+
{"time":"2024-11-30T12:32:33.434061272-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": dial tcp 35.186.228.49:443: connect: no route to host"}
|
41 |
+
{"time":"2024-11-30T16:47:09.087172552-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": read tcp 10.0.0.84:57124->35.186.228.49:443: read: connection reset by peer"}
|
42 |
+
{"time":"2024-11-30T20:42:04.468697581-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
|
43 |
+
{"time":"2024-11-30T20:42:36.856408234-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
44 |
+
{"time":"2024-11-30T20:43:11.213955553-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
45 |
+
{"time":"2024-11-30T20:43:51.102603998-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
46 |
+
{"time":"2024-11-30T20:44:37.626630185-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
47 |
+
{"time":"2024-11-30T20:45:14.657693577-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": read tcp 10.0.0.84:52436->35.186.228.49:443: read: connection reset by peer"}
|
48 |
+
{"time":"2024-12-01T00:45:34.74621148-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
49 |
+
{"time":"2024-12-01T00:46:06.894294578-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
50 |
+
{"time":"2024-12-01T00:46:41.891784748-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
51 |
+
{"time":"2024-12-01T00:47:20.240406935-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
52 |
+
{"time":"2024-12-01T00:48:08.857417923-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
53 |
+
{"time":"2024-12-01T03:18:49.908220918-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
54 |
+
{"time":"2024-12-01T03:19:22.177996379-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
55 |
+
{"time":"2024-12-01T03:19:57.167870716-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
56 |
+
{"time":"2024-12-01T03:20:35.786919209-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
57 |
+
{"time":"2024-12-01T03:21:23.817288934-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
58 |
+
{"time":"2024-12-01T04:35:50.003784946-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
59 |
+
{"time":"2024-12-01T04:36:22.26631399-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
60 |
+
{"time":"2024-12-01T04:36:56.640870294-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
61 |
+
{"time":"2024-12-01T04:37:35.443320555-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
62 |
+
{"time":"2024-12-01T04:38:23.987753899-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
63 |
+
{"time":"2024-12-01T04:46:05.057961007-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
64 |
+
{"time":"2024-12-01T04:46:37.546859547-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
65 |
+
{"time":"2024-12-01T04:47:11.581392685-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
66 |
+
{"time":"2024-12-01T04:47:50.252999457-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
67 |
+
{"time":"2024-12-01T04:48:37.650736566-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
68 |
+
{"time":"2024-12-01T04:49:11.191378623-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": read tcp 10.0.0.84:38402->35.186.228.49:443: read: connection reset by peer"}
|
69 |
+
{"time":"2024-12-01T16:38:05.68648332-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
70 |
+
{"time":"2024-12-01T16:38:37.887436779-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
71 |
+
{"time":"2024-12-01T16:39:12.133333955-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
72 |
+
{"time":"2024-12-01T16:39:52.130430618-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
73 |
+
{"time":"2024-12-01T16:40:39.445088692-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
74 |
+
{"time":"2024-12-01T16:43:20.726602894-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
75 |
+
{"time":"2024-12-01T16:43:52.91904545-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
76 |
+
{"time":"2024-12-01T16:44:27.667498554-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
77 |
+
{"time":"2024-12-01T16:45:07.347776589-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
78 |
+
{"time":"2024-12-01T16:45:54.2028342-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
79 |
+
{"time":"2024-12-01T18:32:20.873980572-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
80 |
+
{"time":"2024-12-01T18:32:53.205902113-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
81 |
+
{"time":"2024-12-01T18:33:27.963313543-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
82 |
+
{"time":"2024-12-01T18:34:07.633098053-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
83 |
+
{"time":"2024-12-01T18:34:57.576633533-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
84 |
+
{"time":"2024-12-01T18:37:35.91680404-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
85 |
+
{"time":"2024-12-01T18:38:08.405404154-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
86 |
+
{"time":"2024-12-01T18:38:42.587426715-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
87 |
+
{"time":"2024-12-01T18:39:20.751273113-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
88 |
+
{"time":"2024-12-01T18:40:07.44210804-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
89 |
+
{"time":"2024-12-02T02:35:51.391509841-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
90 |
+
{"time":"2024-12-02T02:36:23.802341886-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
91 |
+
{"time":"2024-12-02T02:36:57.877540099-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
92 |
+
{"time":"2024-12-02T02:37:37.844207985-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
93 |
+
{"time":"2024-12-02T02:38:24.231618803-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
94 |
+
{"time":"2024-12-02T12:37:52.036374796-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
95 |
+
{"time":"2024-12-02T12:38:24.246737479-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
96 |
+
{"time":"2024-12-02T12:38:58.86214325-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
97 |
+
{"time":"2024-12-02T12:39:37.93956232-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
98 |
+
{"time":"2024-12-02T12:40:27.832359297-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
99 |
+
{"time":"2024-12-02T12:43:22.078082806-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
100 |
+
{"time":"2024-12-02T12:43:54.260401543-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
101 |
+
{"time":"2024-12-02T12:44:28.422029172-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
102 |
+
{"time":"2024-12-02T12:45:07.133435109-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
103 |
+
{"time":"2024-12-02T12:45:55.595030082-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
104 |
+
{"time":"2024-12-02T12:46:39.639355887-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": read tcp 10.0.0.84:53050->35.186.228.49:443: read: connection reset by peer"}
|
105 |
+
{"time":"2024-12-02T14:32:22.213600631-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
106 |
+
{"time":"2024-12-02T14:32:54.409672429-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
|
107 |
+
{"time":"2024-12-02T14:33:29.313237828-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
108 |
+
{"time":"2024-12-02T14:34:08.561175398-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
109 |
+
{"time":"2024-12-02T14:34:58.280659826-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
110 |
+
{"time":"2024-12-02T14:35:40.950530237-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": read tcp 10.0.0.84:53982->35.186.228.49:443: read: connection reset by peer"}
|
111 |
+
{"time":"2024-12-02T22:38:52.669417282-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
112 |
+
{"time":"2024-12-02T22:39:24.808021475-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
113 |
+
{"time":"2024-12-02T22:39:59.412976592-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
114 |
+
{"time":"2024-12-02T22:40:38.099901646-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
115 |
+
{"time":"2024-12-02T22:41:27.308506641-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
116 |
+
{"time":"2024-12-03T00:27:22.905950569-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
117 |
+
{"time":"2024-12-03T00:27:55.246674688-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
118 |
+
{"time":"2024-12-03T00:28:29.944219256-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
119 |
+
{"time":"2024-12-03T00:29:08.68500746-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
120 |
+
{"time":"2024-12-03T00:29:56.678315339-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
121 |
+
{"time":"2024-12-03T08:51:00.224082728-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": read tcp 10.0.0.84:54348->35.186.228.49:443: read: connection reset by peer"}
|
122 |
+
{"time":"2024-12-03T08:55:53.399562922-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
123 |
+
{"time":"2024-12-03T08:56:25.491144652-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
124 |
+
{"time":"2024-12-03T08:57:00.241113785-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
125 |
+
{"time":"2024-12-03T08:57:40.21004883-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
126 |
+
{"time":"2024-12-03T08:58:26.526472631-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
127 |
+
{"time":"2024-12-03T10:36:19.661719074-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": dial tcp: lookup api.wandb.ai on 127.0.0.53:53: server misbehaving"}
|
128 |
+
{"time":"2024-12-03T10:36:38.530396963-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
129 |
+
{"time":"2024-12-03T10:37:10.818041328-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
|
130 |
+
{"time":"2024-12-03T10:37:44.924643065-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
131 |
+
{"time":"2024-12-03T10:38:24.006775695-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
132 |
+
{"time":"2024-12-03T10:39:10.789085577-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
133 |
+
{"time":"2024-12-03T10:47:08.577691365-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
134 |
+
{"time":"2024-12-03T10:47:40.945024167-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
135 |
+
{"time":"2024-12-03T10:48:15.894144562-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
136 |
+
{"time":"2024-12-03T10:48:54.876775304-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
137 |
+
{"time":"2024-12-03T10:49:41.094974693-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
138 |
+
{"time":"2024-12-03T10:50:21.078869466-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": read tcp 10.0.0.84:48148->35.186.228.49:443: read: connection reset by peer"}
|
139 |
+
{"time":"2024-12-03T12:23:51.403070328-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
140 |
+
{"time":"2024-12-03T12:40:53.731145089-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
141 |
+
{"time":"2024-12-03T23:32:54.290577171-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
142 |
+
{"time":"2024-12-03T23:33:26.617679262-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
143 |
+
{"time":"2024-12-03T23:34:00.757344429-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
144 |
+
{"time":"2024-12-03T23:34:39.966159301-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
145 |
+
{"time":"2024-12-03T23:35:26.529833061-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
146 |
+
{"time":"2024-12-04T14:19:55.049105743-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
|
147 |
+
{"time":"2024-12-04T15:28:55.116890309-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
148 |
+
{"time":"2024-12-04T15:29:27.135172446-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
149 |
+
{"time":"2024-12-04T15:30:02.078524164-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
150 |
+
{"time":"2024-12-04T15:30:40.31468816-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
151 |
+
{"time":"2024-12-04T15:31:29.901859494-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
|
152 |
+
{"time":"2024-12-04T17:18:55.237241846-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
153 |
+
{"time":"2024-12-04T17:19:27.334026079-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
154 |
+
{"time":"2024-12-04T17:20:02.022011033-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
155 |
+
{"time":"2024-12-04T17:20:41.434273822-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
156 |
+
{"time":"2024-12-04T17:21:29.254101761-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
157 |
+
{"time":"2024-12-04T18:04:10.322394576-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
158 |
+
{"time":"2024-12-04T18:04:42.484364529-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
159 |
+
{"time":"2024-12-04T18:05:16.646794563-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
160 |
+
{"time":"2024-12-04T18:05:55.608027075-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
161 |
+
{"time":"2024-12-04T18:06:44.553620127-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
162 |
+
{"time":"2024-12-04T22:27:40.607937922-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
163 |
+
{"time":"2024-12-04T22:28:12.836516946-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
|
164 |
+
{"time":"2024-12-04T22:28:47.459223154-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
165 |
+
{"time":"2024-12-04T22:29:26.147960147-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
166 |
+
{"time":"2024-12-04T22:30:15.062106005-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
167 |
+
{"time":"2024-12-05T01:47:39.133391386-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
168 |
+
{"time":"2024-12-05T01:47:55.82293439-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
169 |
+
{"time":"2024-12-05T01:47:58.69274009-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
170 |
+
{"time":"2024-12-05T01:48:14.974980479-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
171 |
+
{"time":"2024-12-05T01:48:20.402384234-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
172 |
+
{"time":"2024-12-05T01:48:35.966316067-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
173 |
+
{"time":"2024-12-05T01:48:45.592640286-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
174 |
+
{"time":"2024-12-05T01:49:02.593889106-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
175 |
+
{"time":"2024-12-05T01:49:19.131511604-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
176 |
+
{"time":"2024-12-05T01:49:36.282249089-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
177 |
+
{"time":"2024-12-05T01:50:15.299754947-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
178 |
+
{"time":"2024-12-05T01:50:27.504309484-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
179 |
+
{"time":"2024-12-05T01:51:32.820040328-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
180 |
+
{"time":"2024-12-05T01:51:45.124161387-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
181 |
+
{"time":"2024-12-05T01:52:32.91054814-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
182 |
+
{"time":"2024-12-05T01:53:01.802742601-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
183 |
+
{"time":"2024-12-05T01:53:49.829573071-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
184 |
+
{"time":"2024-12-05T01:54:01.810922094-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
185 |
+
{"time":"2024-12-05T01:55:01.864377192-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
186 |
+
{"time":"2024-12-05T01:55:07.143197064-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
187 |
+
{"time":"2024-12-05T01:56:07.173072531-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
188 |
+
{"time":"2024-12-05T01:56:18.825182849-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
189 |
+
{"time":"2024-12-05T01:57:24.055080888-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
190 |
+
{"time":"2024-12-05T01:57:25.832815739-08:00","level":"WARN","msg":"sender: taking a long time","seconds":600.000041136,"work":"WorkRecord(*service_go_proto.Request_StopStatus); Control(local:true mailbox_slot:\"a5uf5b0ketip\" connection_id:\"127.0.0.1:40934\")"}
|
191 |
+
{"time":"2024-12-05T01:57:36.240628481-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
192 |
+
{"time":"2024-12-05T01:58:32.869123973-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
193 |
+
{"time":"2024-12-05T01:58:36.256158272-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
194 |
+
{"time":"2024-12-05T01:59:33.099733752-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
195 |
+
{"time":"2024-12-05T01:59:53.664227715-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
196 |
+
{"time":"2024-12-05T01:59:59.516848736-08:00","level":"WARN","msg":"runwork: taking a long time","seconds":600.00106625,"work":"WorkRecord(*service_go_proto.Request_Keepalive); Control(local:true connection_id:\"127.0.0.1:40934\")"}
|
197 |
+
{"time":"2024-12-05T02:00:05.744419675-08:00","level":"WARN","msg":"runwork: taking a long time","seconds":600.000352825,"work":"WorkRecord(*service_go_proto.Record_Stats); Control(always_send:true)"}
|
198 |
+
{"time":"2024-12-05T02:00:05.744480013-08:00","level":"WARN","msg":"runwork: taking a long time","seconds":600.000396572,"work":"WorkRecord(*service_go_proto.Record_Stats); Control(always_send:true)"}
|
199 |
+
{"time":"2024-12-05T02:00:05.745541341-08:00","level":"WARN","msg":"runwork: taking a long time","seconds":600.000591547,"work":"WorkRecord(*service_go_proto.Record_Stats); Control(always_send:true)"}
|
200 |
+
{"time":"2024-12-05T02:00:05.745550259-08:00","level":"WARN","msg":"runwork: taking a long time","seconds":600.000894539,"work":"WorkRecord(*service_go_proto.Record_Stats); Control(always_send:true)"}
|
201 |
+
{"time":"2024-12-05T02:00:05.747662841-08:00","level":"WARN","msg":"runwork: taking a long time","seconds":600.000444392,"work":"WorkRecord(*service_go_proto.Record_Stats); Control(always_send:true)"}
|
202 |
+
{"time":"2024-12-05T02:00:33.136975098-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
203 |
+
{"time":"2024-12-05T02:01:11.07999853-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
204 |
+
{"time":"2024-12-05T02:01:41.902148444-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
205 |
+
{"time":"2024-12-05T02:02:27.673478132-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
206 |
+
{"time":"2024-12-05T02:02:50.303630914-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
207 |
+
{"time":"2024-12-05T02:03:36.076486567-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
208 |
+
{"time":"2024-12-05T02:04:07.620541718-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
209 |
+
{"time":"2024-12-05T02:04:52.67722497-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
210 |
+
{"time":"2024-12-05T02:05:07.755640903-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
211 |
+
{"time":"2024-12-05T02:05:52.699769224-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/graphql"}
|
212 |
+
{"time":"2024-12-05T02:06:56.575240743-08:00","level":"INFO","msg":"sender: succeeded after taking longer than expected","seconds":1170.752439014,"work":"WorkRecord(*service_go_proto.Request_StopStatus); Control(local:true mailbox_slot:\"a5uf5b0ketip\" connection_id:\"127.0.0.1:40934\")"}
|
213 |
+
{"time":"2024-12-05T02:06:56.575254227-08:00","level":"INFO","msg":"runwork: succeeded after taking longer than expected","seconds":1010.828027475,"work":"WorkRecord(*service_go_proto.Record_Stats); Control(always_send:true)"}
|
214 |
+
{"time":"2024-12-05T02:06:56.575270113-08:00","level":"INFO","msg":"runwork: succeeded after taking longer than expected","seconds":1010.830331617,"work":"WorkRecord(*service_go_proto.Record_Stats); Control(always_send:true)"}
|
215 |
+
{"time":"2024-12-05T02:06:56.57528404-08:00","level":"INFO","msg":"runwork: succeeded after taking longer than expected","seconds":1010.830637182,"work":"WorkRecord(*service_go_proto.Record_Stats); Control(always_send:true)"}
|
216 |
+
{"time":"2024-12-05T02:06:56.575306681-08:00","level":"INFO","msg":"runwork: succeeded after taking longer than expected","seconds":1010.831224773,"work":"WorkRecord(*service_go_proto.Record_Stats); Control(always_send:true)"}
|
217 |
+
{"time":"2024-12-05T02:06:56.575330718-08:00","level":"INFO","msg":"runwork: succeeded after taking longer than expected","seconds":1017.05961612,"work":"WorkRecord(*service_go_proto.Request_Keepalive); Control(local:true connection_id:\"127.0.0.1:40934\")"}
|
218 |
+
{"time":"2024-12-05T02:06:56.5753365-08:00","level":"INFO","msg":"runwork: succeeded after taking longer than expected","seconds":1010.831326961,"work":"WorkRecord(*service_go_proto.Record_Stats); Control(always_send:true)"}
|
219 |
+
{"time":"2024-12-05T09:15:26.449381152-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
220 |
+
{"time":"2024-12-05T09:15:58.57629318-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
221 |
+
{"time":"2024-12-05T09:16:33.095093096-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
222 |
+
{"time":"2024-12-05T11:13:14.836120253-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": dial tcp: lookup api.wandb.ai on 127.0.0.53:53: server misbehaving"}
|
223 |
+
{"time":"2024-12-05T11:13:16.949701235-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": dial tcp: lookup api.wandb.ai on 127.0.0.53:53: server misbehaving"}
|
224 |
+
{"time":"2024-12-05T15:47:11.925458079-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
225 |
+
{"time":"2024-12-05T18:23:27.087099548-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": read tcp 10.0.0.84:41532->35.186.228.49:443: read: connection reset by peer"}
|
226 |
+
{"time":"2024-12-05T19:12:12.239890815-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
227 |
+
{"time":"2024-12-05T19:12:44.526201007-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
228 |
+
{"time":"2024-12-05T19:13:18.940020619-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
229 |
+
{"time":"2024-12-05T19:13:56.999079567-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
230 |
+
{"time":"2024-12-05T19:14:43.370678467-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
231 |
+
{"time":"2024-12-05T19:27:27.291303123-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
232 |
+
{"time":"2024-12-05T19:27:59.337388458-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
|
233 |
+
{"time":"2024-12-05T19:28:33.843756768-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
234 |
+
{"time":"2024-12-05T19:29:13.488021118-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
235 |
+
{"time":"2024-12-05T19:29:59.640302841-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
236 |
+
{"time":"2024-12-05T21:23:37.131648811-08:00","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream"}
|
237 |
+
{"time":"2024-12-06T01:06:34.449145502-08:00","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/eify/mup-vit/torch-grafted-redux/file_stream\": dial tcp 35.186.228.49:443: connect: no route to host"}
|
238 |
+
{"time":"2024-12-06T01:55:44.185807115-08:00","level":"INFO","msg":"stream: closing","id":"torch-grafted-redux"}
|
239 |
+
{"time":"2024-12-06T01:55:44.186163552-08:00","level":"INFO","msg":"Stopping system monitor"}
|
240 |
+
{"time":"2024-12-06T01:55:44.188365369-08:00","level":"INFO","msg":"Stopped system monitor"}
|
241 |
+
{"time":"2024-12-06T01:55:44.281103284-08:00","level":"WARN","msg":"No program path found, not creating job artifact. See https://docs.wandb.ai/guides/launch/create-job"}
|
242 |
+
{"time":"2024-12-06T01:55:44.281124225-08:00","level":"INFO","msg":"sender: sendDefer: no job artifact to save"}
|
243 |
+
{"time":"2024-12-06T01:55:45.642796519-08:00","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
244 |
+
{"time":"2024-12-06T01:55:45.770734316-08:00","level":"INFO","msg":"handler: closed","stream_id":"torch-grafted-redux"}
|
245 |
+
{"time":"2024-12-06T01:55:45.77080105-08:00","level":"INFO","msg":"writer: Close: closed","stream_id":"torch-grafted-redux"}
|
246 |
+
{"time":"2024-12-06T01:55:45.770825146-08:00","level":"INFO","msg":"sender: closed","stream_id":"torch-grafted-redux"}
|
247 |
+
{"time":"2024-12-06T01:55:45.77092765-08:00","level":"INFO","msg":"stream: closed","id":"torch-grafted-redux"}
|
grafted/wandb/run-20241127_215015-torch-grafted-redux/logs/debug.log
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
2024-11-27 21:50:15,067 INFO MainThread:72239 [wandb_setup.py:_flush():79] Current SDK version is 0.18.7
|
2 |
+
2024-11-27 21:50:15,067 INFO MainThread:72239 [wandb_setup.py:_flush():79] Configure stats pid to 72239
|
3 |
+
2024-11-27 21:50:15,067 INFO MainThread:72239 [wandb_setup.py:_flush():79] Loading settings from /home/jason-chou/.config/wandb/settings
|
4 |
+
2024-11-27 21:50:15,067 INFO MainThread:72239 [wandb_setup.py:_flush():79] Loading settings from /home/jason-chou/Downloads/big_vision/wandb/settings
|
5 |
+
2024-11-27 21:50:15,067 INFO MainThread:72239 [wandb_setup.py:_flush():79] Loading settings from environment variables: {}
|
6 |
+
2024-11-27 21:50:15,067 INFO MainThread:72239 [wandb_setup.py:_flush():79] Applying setup settings: {'mode': None, '_disable_service': None}
|
7 |
+
2024-11-27 21:50:15,067 WARNING MainThread:72239 [wandb_setup.py:_flush():79] Could not find program at -m big_vision.train
|
8 |
+
2024-11-27 21:50:15,067 INFO MainThread:72239 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program_relpath': None, 'program': '-m big_vision.train'}
|
9 |
+
2024-11-27 21:50:15,067 INFO MainThread:72239 [wandb_setup.py:_flush():79] Applying login settings: {}
|
10 |
+
2024-11-27 21:50:15,067 INFO MainThread:72239 [wandb_init.py:_log_setup():533] Logging user logs to /home/jason-chou/Downloads/big_vision/wandb/run-20241127_215015-torch-grafted-redux/logs/debug.log
|
11 |
+
2024-11-27 21:50:15,067 INFO MainThread:72239 [wandb_init.py:_log_setup():534] Logging internal logs to /home/jason-chou/Downloads/big_vision/wandb/run-20241127_215015-torch-grafted-redux/logs/debug-internal.log
|
12 |
+
2024-11-27 21:50:15,067 INFO MainThread:72239 [wandb_init.py:init():619] calling init triggers
|
13 |
+
2024-11-27 21:50:15,068 INFO MainThread:72239 [wandb_init.py:init():626] wandb.init called with sweep_config: {}
|
14 |
+
config: {'_fields': {'seed': 0, 'total_epochs': 90, 'num_classes': 1000, 'loss': 'softmax_xent', 'input': accum_freq: 8
|
15 |
+
batch_size: 1024
|
16 |
+
cache_raw: false
|
17 |
+
data:
|
18 |
+
name: imagenet2012
|
19 |
+
split: train
|
20 |
+
pp: decode_jpeg_and_inception_crop(224)|flip_lr|randaug(2,10)|value_range(-1, 1)|onehot(1000,
|
21 |
+
key="label", key_result="labels")|keep("image", "labels")
|
22 |
+
shuffle_buffer_size: 150000
|
23 |
+
, 'pp_modules': ['ops_general', 'ops_image', 'ops_text', 'archive.randaug'], 'log_training_steps': 50, 'ckpt_steps': 1000, 'model_name': 'vit', 'model': pool_type: gap
|
24 |
+
posemb: sincos2d
|
25 |
+
rep_size: false
|
26 |
+
variant: S/16
|
27 |
+
, 'grad_clip_norm': 1.0, 'optax_name': 'scale_by_adam', 'optax': mu_dtype: bfloat16
|
28 |
+
, 'lr': 0.001, 'wd': 0.0001, 'schedule': decay_type: cosine
|
29 |
+
warmup_steps: 10000
|
30 |
+
, 'mixup': fold_in: null
|
31 |
+
p: 0.2
|
32 |
+
, 'evals': val:
|
33 |
+
data:
|
34 |
+
name: imagenet2012
|
35 |
+
split: validation
|
36 |
+
log_steps: 2500
|
37 |
+
loss_name: softmax_xent
|
38 |
+
pp_fn: decode|resize_small(256)|central_crop(224)|value_range(-1, 1)|onehot(1000,
|
39 |
+
key="label", key_result="labels")|keep("image", "labels")
|
40 |
+
type: classification
|
41 |
+
}, '_locked': True, '_type_safe': True, '_convert_dict': True, '_allow_dotted_keys': False, '_sort_keys': True}
|
42 |
+
2024-11-27 21:50:15,068 INFO MainThread:72239 [wandb_init.py:init():669] starting backend
|
43 |
+
2024-11-27 21:50:15,068 INFO MainThread:72239 [wandb_init.py:init():673] sending inform_init request
|
44 |
+
2024-11-27 21:50:15,070 INFO MainThread:72239 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
|
45 |
+
2024-11-27 21:50:15,071 INFO MainThread:72239 [wandb_init.py:init():686] backend started and connected
|
46 |
+
2024-11-27 21:50:15,076 INFO MainThread:72239 [wandb_init.py:init():781] updated telemetry
|
47 |
+
2024-11-27 21:50:15,080 INFO MainThread:72239 [wandb_init.py:init():814] communicating run to backend with 90.0 second timeout
|
48 |
+
2024-11-27 21:50:15,728 INFO MainThread:72239 [wandb_init.py:init():859] run resumed
|
49 |
+
2024-11-27 21:50:15,740 INFO MainThread:72239 [wandb_init.py:init():867] starting run threads in backend
|
50 |
+
2024-11-27 21:50:15,808 INFO MainThread:72239 [wandb_run.py:_console_start():2456] atexit reg
|
51 |
+
2024-11-27 21:50:15,808 INFO MainThread:72239 [wandb_run.py:_redirect():2305] redirect: wrap_raw
|
52 |
+
2024-11-27 21:50:15,809 INFO MainThread:72239 [wandb_run.py:_redirect():2370] Wrapping output streams.
|
53 |
+
2024-11-27 21:50:15,809 INFO MainThread:72239 [wandb_run.py:_redirect():2395] Redirects installed.
|
54 |
+
2024-11-27 21:50:15,810 INFO MainThread:72239 [wandb_init.py:init():911] run started, returning control to user process
|
55 |
+
2024-12-06 01:55:44,185 WARNING MsgRouterThr:72239 [router.py:message_loop():75] message_loop has been closed
|
grafted/wandb/run-20241127_215015-torch-grafted-redux/run-torch-grafted-redux.wandb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57dfc6760ef360b69a2d3a3f55620d815f8518b1f2ddc461bcb21aaf665973a9
|
3 |
+
size 115832137
|
grafted/wandb/wandb-resume.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"run_id": "torch-grafted-redux"}
|