Commit
·
036642a
1
Parent(s):
febdf5b
chore(muon): update comment
Browse files- build/torch26-cxx11-cu118-x86_64-linux/optimizer/_ops.py +3 -3
- build/torch26-cxx11-cu118-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
- build/torch26-cxx11-cu118-x86_64-linux/optimizer/muon.py +3 -5
- build/torch26-cxx11-cu124-x86_64-linux/optimizer/_ops.py +3 -3
- build/torch26-cxx11-cu124-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
- build/torch26-cxx11-cu124-x86_64-linux/optimizer/muon.py +3 -5
- build/torch26-cxx11-cu126-x86_64-linux/optimizer/_ops.py +3 -3
- build/torch26-cxx11-cu126-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
- build/torch26-cxx11-cu126-x86_64-linux/optimizer/muon.py +3 -5
- build/torch26-cxx11-rocm62-x86_64-linux/optimizer/_ops.py +3 -3
- build/torch26-cxx11-rocm62-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
- build/torch26-cxx11-rocm62-x86_64-linux/optimizer/muon.py +3 -5
- build/torch26-cxx98-cu118-x86_64-linux/optimizer/_ops.py +3 -3
- build/torch26-cxx98-cu118-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
- build/torch26-cxx98-cu118-x86_64-linux/optimizer/muon.py +3 -5
- build/torch26-cxx98-cu124-x86_64-linux/optimizer/_ops.py +3 -3
- build/torch26-cxx98-cu124-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
- build/torch26-cxx98-cu124-x86_64-linux/optimizer/muon.py +3 -5
- build/torch26-cxx98-cu126-x86_64-linux/optimizer/_ops.py +3 -3
- build/torch26-cxx98-cu126-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
- build/torch26-cxx98-cu126-x86_64-linux/optimizer/muon.py +3 -5
- build/torch27-cxx11-cu118-x86_64-linux/optimizer/_ops.py +3 -3
- build/torch27-cxx11-cu118-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
- build/torch27-cxx11-cu118-x86_64-linux/optimizer/muon.py +3 -5
- build/torch27-cxx11-cu126-x86_64-linux/optimizer/_ops.py +3 -3
- build/torch27-cxx11-cu126-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
- build/torch27-cxx11-cu126-x86_64-linux/optimizer/muon.py +3 -5
- build/torch27-cxx11-cu128-x86_64-linux/optimizer/_ops.py +3 -3
- build/torch27-cxx11-cu128-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
- build/torch27-cxx11-cu128-x86_64-linux/optimizer/muon.py +3 -5
- build/torch27-cxx11-rocm63-x86_64-linux/optimizer/_ops.py +3 -3
- build/torch27-cxx11-rocm63-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
- build/torch27-cxx11-rocm63-x86_64-linux/optimizer/muon.py +3 -5
- torch-ext/optimizer/muon.py +3 -5
build/torch26-cxx11-cu118-x86_64-linux/optimizer/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _optimizer_febdf5b_dirty
|
3 |
+
ops = torch.ops._optimizer_febdf5b_dirty
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_optimizer_febdf5b_dirty::{op_name}"
|
build/torch26-cxx11-cu118-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1787272
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98bd4b647ad0ecbae82a5e78f618475b47595c5bb68b3356c09ee8b1f1a57060
|
3 |
size 1787272
|
build/torch26-cxx11-cu118-x86_64-linux/optimizer/muon.py
CHANGED
@@ -5,11 +5,9 @@ import torch
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
8 |
-
# TODO leave original url and consider LICENSE
|
9 |
-
# This code snippet is a modified version adapted from the following GitHub repository:
|
10 |
-
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
-
|
12 |
|
|
|
|
|
13 |
@torch.no_grad()
|
14 |
def _zeropower_via_newtonschulz5(G, steps):
|
15 |
"""
|
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
|
|
34 |
A = X @ X.T
|
35 |
# B = (
|
36 |
# b * A + c * A @ A
|
37 |
-
# )
|
38 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
39 |
# X = a * X + B @ X
|
40 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# This code snippet is a modified version adapted from the following GitHub repositories:
|
10 |
+
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
@torch.no_grad()
|
12 |
def _zeropower_via_newtonschulz5(G, steps):
|
13 |
"""
|
|
|
32 |
A = X @ X.T
|
33 |
# B = (
|
34 |
# b * A + c * A @ A
|
35 |
+
# )
|
36 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
37 |
# X = a * X + B @ X
|
38 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
build/torch26-cxx11-cu124-x86_64-linux/optimizer/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _optimizer_febdf5b_dirty
|
3 |
+
ops = torch.ops._optimizer_febdf5b_dirty
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_optimizer_febdf5b_dirty::{op_name}"
|
build/torch26-cxx11-cu124-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1824224
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:796ac374cd2eec4260591c5a771c6b324f7dc6c8f34fc5dc211ab8afca546ffe
|
3 |
size 1824224
|
build/torch26-cxx11-cu124-x86_64-linux/optimizer/muon.py
CHANGED
@@ -5,11 +5,9 @@ import torch
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
8 |
-
# TODO leave original url and consider LICENSE
|
9 |
-
# This code snippet is a modified version adapted from the following GitHub repository:
|
10 |
-
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
-
|
12 |
|
|
|
|
|
13 |
@torch.no_grad()
|
14 |
def _zeropower_via_newtonschulz5(G, steps):
|
15 |
"""
|
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
|
|
34 |
A = X @ X.T
|
35 |
# B = (
|
36 |
# b * A + c * A @ A
|
37 |
-
# )
|
38 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
39 |
# X = a * X + B @ X
|
40 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# This code snippet is a modified version adapted from the following GitHub repositories:
|
10 |
+
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
@torch.no_grad()
|
12 |
def _zeropower_via_newtonschulz5(G, steps):
|
13 |
"""
|
|
|
32 |
A = X @ X.T
|
33 |
# B = (
|
34 |
# b * A + c * A @ A
|
35 |
+
# )
|
36 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
37 |
# X = a * X + B @ X
|
38 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
build/torch26-cxx11-cu126-x86_64-linux/optimizer/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _optimizer_febdf5b_dirty
|
3 |
+
ops = torch.ops._optimizer_febdf5b_dirty
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_optimizer_febdf5b_dirty::{op_name}"
|
build/torch26-cxx11-cu126-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1824224
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:254706f111eb794b1409ba48d25649ace5438e2c66027727e84490011ee4c5e6
|
3 |
size 1824224
|
build/torch26-cxx11-cu126-x86_64-linux/optimizer/muon.py
CHANGED
@@ -5,11 +5,9 @@ import torch
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
8 |
-
# TODO leave original url and consider LICENSE
|
9 |
-
# This code snippet is a modified version adapted from the following GitHub repository:
|
10 |
-
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
-
|
12 |
|
|
|
|
|
13 |
@torch.no_grad()
|
14 |
def _zeropower_via_newtonschulz5(G, steps):
|
15 |
"""
|
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
|
|
34 |
A = X @ X.T
|
35 |
# B = (
|
36 |
# b * A + c * A @ A
|
37 |
-
# )
|
38 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
39 |
# X = a * X + B @ X
|
40 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# This code snippet is a modified version adapted from the following GitHub repositories:
|
10 |
+
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
@torch.no_grad()
|
12 |
def _zeropower_via_newtonschulz5(G, steps):
|
13 |
"""
|
|
|
32 |
A = X @ X.T
|
33 |
# B = (
|
34 |
# b * A + c * A @ A
|
35 |
+
# )
|
36 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
37 |
# X = a * X + B @ X
|
38 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
build/torch26-cxx11-rocm62-x86_64-linux/optimizer/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _optimizer_febdf5b_dirty
|
3 |
+
ops = torch.ops._optimizer_febdf5b_dirty
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_optimizer_febdf5b_dirty::{op_name}"
|
build/torch26-cxx11-rocm62-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1749744
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:027a26212a3dd705876ca83015a53b69d17d80fe7c1559fb01d7aacf614edb57
|
3 |
size 1749744
|
build/torch26-cxx11-rocm62-x86_64-linux/optimizer/muon.py
CHANGED
@@ -5,11 +5,9 @@ import torch
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
8 |
-
# TODO leave original url and consider LICENSE
|
9 |
-
# This code snippet is a modified version adapted from the following GitHub repository:
|
10 |
-
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
-
|
12 |
|
|
|
|
|
13 |
@torch.no_grad()
|
14 |
def _zeropower_via_newtonschulz5(G, steps):
|
15 |
"""
|
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
|
|
34 |
A = X @ X.T
|
35 |
# B = (
|
36 |
# b * A + c * A @ A
|
37 |
-
# )
|
38 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
39 |
# X = a * X + B @ X
|
40 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# This code snippet is a modified version adapted from the following GitHub repositories:
|
10 |
+
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
@torch.no_grad()
|
12 |
def _zeropower_via_newtonschulz5(G, steps):
|
13 |
"""
|
|
|
32 |
A = X @ X.T
|
33 |
# B = (
|
34 |
# b * A + c * A @ A
|
35 |
+
# )
|
36 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
37 |
# X = a * X + B @ X
|
38 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
build/torch26-cxx98-cu118-x86_64-linux/optimizer/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _optimizer_febdf5b_dirty
|
3 |
+
ops = torch.ops._optimizer_febdf5b_dirty
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_optimizer_febdf5b_dirty::{op_name}"
|
build/torch26-cxx98-cu118-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1787192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62c4408eaf54197941241ae6150afe1401a8bcf5854488a8b957d1f1546b388a
|
3 |
size 1787192
|
build/torch26-cxx98-cu118-x86_64-linux/optimizer/muon.py
CHANGED
@@ -5,11 +5,9 @@ import torch
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
8 |
-
# TODO leave original url and consider LICENSE
|
9 |
-
# This code snippet is a modified version adapted from the following GitHub repository:
|
10 |
-
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
-
|
12 |
|
|
|
|
|
13 |
@torch.no_grad()
|
14 |
def _zeropower_via_newtonschulz5(G, steps):
|
15 |
"""
|
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
|
|
34 |
A = X @ X.T
|
35 |
# B = (
|
36 |
# b * A + c * A @ A
|
37 |
-
# )
|
38 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
39 |
# X = a * X + B @ X
|
40 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# This code snippet is a modified version adapted from the following GitHub repositories:
|
10 |
+
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
@torch.no_grad()
|
12 |
def _zeropower_via_newtonschulz5(G, steps):
|
13 |
"""
|
|
|
32 |
A = X @ X.T
|
33 |
# B = (
|
34 |
# b * A + c * A @ A
|
35 |
+
# )
|
36 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
37 |
# X = a * X + B @ X
|
38 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
build/torch26-cxx98-cu124-x86_64-linux/optimizer/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _optimizer_febdf5b_dirty
|
3 |
+
ops = torch.ops._optimizer_febdf5b_dirty
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_optimizer_febdf5b_dirty::{op_name}"
|
build/torch26-cxx98-cu124-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1824184
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:166d253c91459e1aa1328a1550b0e3ec4bb7c6057870b1d7472a93cc987cf85a
|
3 |
size 1824184
|
build/torch26-cxx98-cu124-x86_64-linux/optimizer/muon.py
CHANGED
@@ -5,11 +5,9 @@ import torch
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
8 |
-
# TODO leave original url and consider LICENSE
|
9 |
-
# This code snippet is a modified version adapted from the following GitHub repository:
|
10 |
-
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
-
|
12 |
|
|
|
|
|
13 |
@torch.no_grad()
|
14 |
def _zeropower_via_newtonschulz5(G, steps):
|
15 |
"""
|
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
|
|
34 |
A = X @ X.T
|
35 |
# B = (
|
36 |
# b * A + c * A @ A
|
37 |
-
# )
|
38 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
39 |
# X = a * X + B @ X
|
40 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# This code snippet is a modified version adapted from the following GitHub repositories:
|
10 |
+
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
@torch.no_grad()
|
12 |
def _zeropower_via_newtonschulz5(G, steps):
|
13 |
"""
|
|
|
32 |
A = X @ X.T
|
33 |
# B = (
|
34 |
# b * A + c * A @ A
|
35 |
+
# )
|
36 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
37 |
# X = a * X + B @ X
|
38 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
build/torch26-cxx98-cu126-x86_64-linux/optimizer/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _optimizer_febdf5b_dirty
|
3 |
+
ops = torch.ops._optimizer_febdf5b_dirty
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_optimizer_febdf5b_dirty::{op_name}"
|
build/torch26-cxx98-cu126-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1824184
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8bb7315b326f9af7a77e023c2b78511190235a8dcc9682abd5b49db1dc2b90f2
|
3 |
size 1824184
|
build/torch26-cxx98-cu126-x86_64-linux/optimizer/muon.py
CHANGED
@@ -5,11 +5,9 @@ import torch
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
8 |
-
# TODO leave original url and consider LICENSE
|
9 |
-
# This code snippet is a modified version adapted from the following GitHub repository:
|
10 |
-
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
-
|
12 |
|
|
|
|
|
13 |
@torch.no_grad()
|
14 |
def _zeropower_via_newtonschulz5(G, steps):
|
15 |
"""
|
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
|
|
34 |
A = X @ X.T
|
35 |
# B = (
|
36 |
# b * A + c * A @ A
|
37 |
-
# )
|
38 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
39 |
# X = a * X + B @ X
|
40 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# This code snippet is a modified version adapted from the following GitHub repositories:
|
10 |
+
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
@torch.no_grad()
|
12 |
def _zeropower_via_newtonschulz5(G, steps):
|
13 |
"""
|
|
|
32 |
A = X @ X.T
|
33 |
# B = (
|
34 |
# b * A + c * A @ A
|
35 |
+
# )
|
36 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
37 |
# X = a * X + B @ X
|
38 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
build/torch27-cxx11-cu118-x86_64-linux/optimizer/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _optimizer_febdf5b_dirty
|
3 |
+
ops = torch.ops._optimizer_febdf5b_dirty
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_optimizer_febdf5b_dirty::{op_name}"
|
build/torch27-cxx11-cu118-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1787368
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a74351ee471271eaf1c8292ed01b7e71e6b1b683704144d68d90b67032ba386
|
3 |
size 1787368
|
build/torch27-cxx11-cu118-x86_64-linux/optimizer/muon.py
CHANGED
@@ -5,11 +5,9 @@ import torch
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
8 |
-
# TODO leave original url and consider LICENSE
|
9 |
-
# This code snippet is a modified version adapted from the following GitHub repository:
|
10 |
-
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
-
|
12 |
|
|
|
|
|
13 |
@torch.no_grad()
|
14 |
def _zeropower_via_newtonschulz5(G, steps):
|
15 |
"""
|
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
|
|
34 |
A = X @ X.T
|
35 |
# B = (
|
36 |
# b * A + c * A @ A
|
37 |
-
# )
|
38 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
39 |
# X = a * X + B @ X
|
40 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# This code snippet is a modified version adapted from the following GitHub repositories:
|
10 |
+
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
@torch.no_grad()
|
12 |
def _zeropower_via_newtonschulz5(G, steps):
|
13 |
"""
|
|
|
32 |
A = X @ X.T
|
33 |
# B = (
|
34 |
# b * A + c * A @ A
|
35 |
+
# )
|
36 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
37 |
# X = a * X + B @ X
|
38 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
build/torch27-cxx11-cu126-x86_64-linux/optimizer/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _optimizer_febdf5b_dirty
|
3 |
+
ops = torch.ops._optimizer_febdf5b_dirty
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_optimizer_febdf5b_dirty::{op_name}"
|
build/torch27-cxx11-cu126-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1824256
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ffb7e3a786405106908da16e74506fe381b09e5e04a27b1062396e378f63f7f7
|
3 |
size 1824256
|
build/torch27-cxx11-cu126-x86_64-linux/optimizer/muon.py
CHANGED
@@ -5,11 +5,9 @@ import torch
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
8 |
-
# TODO leave original url and consider LICENSE
|
9 |
-
# This code snippet is a modified version adapted from the following GitHub repository:
|
10 |
-
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
-
|
12 |
|
|
|
|
|
13 |
@torch.no_grad()
|
14 |
def _zeropower_via_newtonschulz5(G, steps):
|
15 |
"""
|
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
|
|
34 |
A = X @ X.T
|
35 |
# B = (
|
36 |
# b * A + c * A @ A
|
37 |
-
# )
|
38 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
39 |
# X = a * X + B @ X
|
40 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# This code snippet is a modified version adapted from the following GitHub repositories:
|
10 |
+
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
@torch.no_grad()
|
12 |
def _zeropower_via_newtonschulz5(G, steps):
|
13 |
"""
|
|
|
32 |
A = X @ X.T
|
33 |
# B = (
|
34 |
# b * A + c * A @ A
|
35 |
+
# )
|
36 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
37 |
# X = a * X + B @ X
|
38 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
build/torch27-cxx11-cu128-x86_64-linux/optimizer/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _optimizer_febdf5b_dirty
|
3 |
+
ops = torch.ops._optimizer_febdf5b_dirty
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_optimizer_febdf5b_dirty::{op_name}"
|
build/torch27-cxx11-cu128-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1883352
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45ee6c653f216af96705a25993d85751648ccd4714a8d6c8c36bdbc8dc19edc5
|
3 |
size 1883352
|
build/torch27-cxx11-cu128-x86_64-linux/optimizer/muon.py
CHANGED
@@ -5,11 +5,9 @@ import torch
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
8 |
-
# TODO leave original url and consider LICENSE
|
9 |
-
# This code snippet is a modified version adapted from the following GitHub repository:
|
10 |
-
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
-
|
12 |
|
|
|
|
|
13 |
@torch.no_grad()
|
14 |
def _zeropower_via_newtonschulz5(G, steps):
|
15 |
"""
|
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
|
|
34 |
A = X @ X.T
|
35 |
# B = (
|
36 |
# b * A + c * A @ A
|
37 |
-
# )
|
38 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
39 |
# X = a * X + B @ X
|
40 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# This code snippet is a modified version adapted from the following GitHub repositories:
|
10 |
+
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
@torch.no_grad()
|
12 |
def _zeropower_via_newtonschulz5(G, steps):
|
13 |
"""
|
|
|
32 |
A = X @ X.T
|
33 |
# B = (
|
34 |
# b * A + c * A @ A
|
35 |
+
# )
|
36 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
37 |
# X = a * X + B @ X
|
38 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
build/torch27-cxx11-rocm63-x86_64-linux/optimizer/_ops.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import torch
|
2 |
-
from . import
|
3 |
-
ops = torch.ops.
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
-
return f"
|
|
|
1 |
import torch
|
2 |
+
from . import _optimizer_febdf5b_dirty
|
3 |
+
ops = torch.ops._optimizer_febdf5b_dirty
|
4 |
|
5 |
def add_op_namespace_prefix(op_name: str):
|
6 |
"""
|
7 |
Prefix op by namespace.
|
8 |
"""
|
9 |
+
return f"_optimizer_febdf5b_dirty::{op_name}"
|
build/torch27-cxx11-rocm63-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so}
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1749648
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8427dae3274100063f3b003a7cebf9565318fcaa2fa340482b2ec9408e9dcea0
|
3 |
size 1749648
|
build/torch27-cxx11-rocm63-x86_64-linux/optimizer/muon.py
CHANGED
@@ -5,11 +5,9 @@ import torch
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
8 |
-
# TODO leave original url and consider LICENSE
|
9 |
-
# This code snippet is a modified version adapted from the following GitHub repository:
|
10 |
-
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
-
|
12 |
|
|
|
|
|
13 |
@torch.no_grad()
|
14 |
def _zeropower_via_newtonschulz5(G, steps):
|
15 |
"""
|
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
|
|
34 |
A = X @ X.T
|
35 |
# B = (
|
36 |
# b * A + c * A @ A
|
37 |
-
# )
|
38 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
39 |
# X = a * X + B @ X
|
40 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# This code snippet is a modified version adapted from the following GitHub repositories:
|
10 |
+
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
@torch.no_grad()
|
12 |
def _zeropower_via_newtonschulz5(G, steps):
|
13 |
"""
|
|
|
32 |
A = X @ X.T
|
33 |
# B = (
|
34 |
# b * A + c * A @ A
|
35 |
+
# )
|
36 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
37 |
# X = a * X + B @ X
|
38 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
torch-ext/optimizer/muon.py
CHANGED
@@ -5,11 +5,9 @@ import torch
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
8 |
-
# TODO leave original url and consider LICENSE
|
9 |
-
# This code snippet is a modified version adapted from the following GitHub repository:
|
10 |
-
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
-
|
12 |
|
|
|
|
|
13 |
@torch.no_grad()
|
14 |
def _zeropower_via_newtonschulz5(G, steps):
|
15 |
"""
|
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
|
|
34 |
A = X @ X.T
|
35 |
# B = (
|
36 |
# b * A + c * A @ A
|
37 |
-
# )
|
38 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
39 |
# X = a * X + B @ X
|
40 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|
|
|
5 |
import torch.distributed as dist
|
6 |
from torch.distributed._tensor import DTensor
|
7 |
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
# This code snippet is a modified version adapted from the following GitHub repositories:
|
10 |
+
# https://github.com/KellerJordan/Muon/blob/master/muon.py
|
11 |
@torch.no_grad()
|
12 |
def _zeropower_via_newtonschulz5(G, steps):
|
13 |
"""
|
|
|
32 |
A = X @ X.T
|
33 |
# B = (
|
34 |
# b * A + c * A @ A
|
35 |
+
# )
|
36 |
B = torch.addmm(A, A, A, alpha=c, beta=b)
|
37 |
# X = a * X + B @ X
|
38 |
X = torch.addmm(X, B, X, alpha=1.0, beta=a)
|