iamwyldecat commited on
Commit
036642a
·
1 Parent(s): febdf5b

chore(muon): update comment

Browse files
Files changed (34) hide show
  1. build/torch26-cxx11-cu118-x86_64-linux/optimizer/_ops.py +3 -3
  2. build/torch26-cxx11-cu118-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
  3. build/torch26-cxx11-cu118-x86_64-linux/optimizer/muon.py +3 -5
  4. build/torch26-cxx11-cu124-x86_64-linux/optimizer/_ops.py +3 -3
  5. build/torch26-cxx11-cu124-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
  6. build/torch26-cxx11-cu124-x86_64-linux/optimizer/muon.py +3 -5
  7. build/torch26-cxx11-cu126-x86_64-linux/optimizer/_ops.py +3 -3
  8. build/torch26-cxx11-cu126-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
  9. build/torch26-cxx11-cu126-x86_64-linux/optimizer/muon.py +3 -5
  10. build/torch26-cxx11-rocm62-x86_64-linux/optimizer/_ops.py +3 -3
  11. build/torch26-cxx11-rocm62-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
  12. build/torch26-cxx11-rocm62-x86_64-linux/optimizer/muon.py +3 -5
  13. build/torch26-cxx98-cu118-x86_64-linux/optimizer/_ops.py +3 -3
  14. build/torch26-cxx98-cu118-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
  15. build/torch26-cxx98-cu118-x86_64-linux/optimizer/muon.py +3 -5
  16. build/torch26-cxx98-cu124-x86_64-linux/optimizer/_ops.py +3 -3
  17. build/torch26-cxx98-cu124-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
  18. build/torch26-cxx98-cu124-x86_64-linux/optimizer/muon.py +3 -5
  19. build/torch26-cxx98-cu126-x86_64-linux/optimizer/_ops.py +3 -3
  20. build/torch26-cxx98-cu126-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
  21. build/torch26-cxx98-cu126-x86_64-linux/optimizer/muon.py +3 -5
  22. build/torch27-cxx11-cu118-x86_64-linux/optimizer/_ops.py +3 -3
  23. build/torch27-cxx11-cu118-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
  24. build/torch27-cxx11-cu118-x86_64-linux/optimizer/muon.py +3 -5
  25. build/torch27-cxx11-cu126-x86_64-linux/optimizer/_ops.py +3 -3
  26. build/torch27-cxx11-cu126-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
  27. build/torch27-cxx11-cu126-x86_64-linux/optimizer/muon.py +3 -5
  28. build/torch27-cxx11-cu128-x86_64-linux/optimizer/_ops.py +3 -3
  29. build/torch27-cxx11-cu128-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
  30. build/torch27-cxx11-cu128-x86_64-linux/optimizer/muon.py +3 -5
  31. build/torch27-cxx11-rocm63-x86_64-linux/optimizer/_ops.py +3 -3
  32. build/torch27-cxx11-rocm63-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} +1 -1
  33. build/torch27-cxx11-rocm63-x86_64-linux/optimizer/muon.py +3 -5
  34. torch-ext/optimizer/muon.py +3 -5
build/torch26-cxx11-cu118-x86_64-linux/optimizer/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _optimizer_bdd2678_dirty
3
- ops = torch.ops._optimizer_bdd2678_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_optimizer_bdd2678_dirty::{op_name}"
 
1
  import torch
2
+ from . import _optimizer_febdf5b_dirty
3
+ ops = torch.ops._optimizer_febdf5b_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_optimizer_febdf5b_dirty::{op_name}"
build/torch26-cxx11-cu118-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9119d3a6d99c07a17d110d2ccf6042f199d00c839f5efa74008c1642d21e48b0
3
  size 1787272
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98bd4b647ad0ecbae82a5e78f618475b47595c5bb68b3356c09ee8b1f1a57060
3
  size 1787272
build/torch26-cxx11-cu118-x86_64-linux/optimizer/muon.py CHANGED
@@ -5,11 +5,9 @@ import torch
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
8
- # TODO leave original url and consider LICENSE
9
- # This code snippet is a modified version adapted from the following GitHub repository:
10
- # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
-
12
 
 
 
13
  @torch.no_grad()
14
  def _zeropower_via_newtonschulz5(G, steps):
15
  """
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
34
  A = X @ X.T
35
  # B = (
36
  # b * A + c * A @ A
37
- # ) # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng
38
  B = torch.addmm(A, A, A, alpha=c, beta=b)
39
  # X = a * X + B @ X
40
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
 
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
 
 
 
 
8
 
9
+ # This code snippet is a modified version adapted from the following GitHub repositories:
10
+ # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
  @torch.no_grad()
12
  def _zeropower_via_newtonschulz5(G, steps):
13
  """
 
32
  A = X @ X.T
33
  # B = (
34
  # b * A + c * A @ A
35
+ # )
36
  B = torch.addmm(A, A, A, alpha=c, beta=b)
37
  # X = a * X + B @ X
38
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
build/torch26-cxx11-cu124-x86_64-linux/optimizer/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _optimizer_bdd2678_dirty
3
- ops = torch.ops._optimizer_bdd2678_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_optimizer_bdd2678_dirty::{op_name}"
 
1
  import torch
2
+ from . import _optimizer_febdf5b_dirty
3
+ ops = torch.ops._optimizer_febdf5b_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_optimizer_febdf5b_dirty::{op_name}"
build/torch26-cxx11-cu124-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91b76cd5be429f99840e26e8ba55b61f9fdcae19301bd7c082b2e9746a276501
3
  size 1824224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:796ac374cd2eec4260591c5a771c6b324f7dc6c8f34fc5dc211ab8afca546ffe
3
  size 1824224
build/torch26-cxx11-cu124-x86_64-linux/optimizer/muon.py CHANGED
@@ -5,11 +5,9 @@ import torch
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
8
- # TODO leave original url and consider LICENSE
9
- # This code snippet is a modified version adapted from the following GitHub repository:
10
- # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
-
12
 
 
 
13
  @torch.no_grad()
14
  def _zeropower_via_newtonschulz5(G, steps):
15
  """
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
34
  A = X @ X.T
35
  # B = (
36
  # b * A + c * A @ A
37
- # ) # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng
38
  B = torch.addmm(A, A, A, alpha=c, beta=b)
39
  # X = a * X + B @ X
40
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
 
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
 
 
 
 
8
 
9
+ # This code snippet is a modified version adapted from the following GitHub repositories:
10
+ # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
  @torch.no_grad()
12
  def _zeropower_via_newtonschulz5(G, steps):
13
  """
 
32
  A = X @ X.T
33
  # B = (
34
  # b * A + c * A @ A
35
+ # )
36
  B = torch.addmm(A, A, A, alpha=c, beta=b)
37
  # X = a * X + B @ X
38
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
build/torch26-cxx11-cu126-x86_64-linux/optimizer/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _optimizer_bdd2678_dirty
3
- ops = torch.ops._optimizer_bdd2678_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_optimizer_bdd2678_dirty::{op_name}"
 
1
  import torch
2
+ from . import _optimizer_febdf5b_dirty
3
+ ops = torch.ops._optimizer_febdf5b_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_optimizer_febdf5b_dirty::{op_name}"
build/torch26-cxx11-cu126-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:807d59aca5b0403206395a1f4c770b8d644294c17f6af866207c36ac617f0a7d
3
  size 1824224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:254706f111eb794b1409ba48d25649ace5438e2c66027727e84490011ee4c5e6
3
  size 1824224
build/torch26-cxx11-cu126-x86_64-linux/optimizer/muon.py CHANGED
@@ -5,11 +5,9 @@ import torch
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
8
- # TODO leave original url and consider LICENSE
9
- # This code snippet is a modified version adapted from the following GitHub repository:
10
- # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
-
12
 
 
 
13
  @torch.no_grad()
14
  def _zeropower_via_newtonschulz5(G, steps):
15
  """
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
34
  A = X @ X.T
35
  # B = (
36
  # b * A + c * A @ A
37
- # ) # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng
38
  B = torch.addmm(A, A, A, alpha=c, beta=b)
39
  # X = a * X + B @ X
40
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
 
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
 
 
 
 
8
 
9
+ # This code snippet is a modified version adapted from the following GitHub repositories:
10
+ # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
  @torch.no_grad()
12
  def _zeropower_via_newtonschulz5(G, steps):
13
  """
 
32
  A = X @ X.T
33
  # B = (
34
  # b * A + c * A @ A
35
+ # )
36
  B = torch.addmm(A, A, A, alpha=c, beta=b)
37
  # X = a * X + B @ X
38
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
build/torch26-cxx11-rocm62-x86_64-linux/optimizer/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _optimizer_bdd2678_dirty
3
- ops = torch.ops._optimizer_bdd2678_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_optimizer_bdd2678_dirty::{op_name}"
 
1
  import torch
2
+ from . import _optimizer_febdf5b_dirty
3
+ ops = torch.ops._optimizer_febdf5b_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_optimizer_febdf5b_dirty::{op_name}"
build/torch26-cxx11-rocm62-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d2bdd755079fa06a27401b8a26ac425d35514d196f9df4ce1be5c52ebcc9a64
3
  size 1749744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:027a26212a3dd705876ca83015a53b69d17d80fe7c1559fb01d7aacf614edb57
3
  size 1749744
build/torch26-cxx11-rocm62-x86_64-linux/optimizer/muon.py CHANGED
@@ -5,11 +5,9 @@ import torch
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
8
- # TODO leave original url and consider LICENSE
9
- # This code snippet is a modified version adapted from the following GitHub repository:
10
- # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
-
12
 
 
 
13
  @torch.no_grad()
14
  def _zeropower_via_newtonschulz5(G, steps):
15
  """
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
34
  A = X @ X.T
35
  # B = (
36
  # b * A + c * A @ A
37
- # ) # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng
38
  B = torch.addmm(A, A, A, alpha=c, beta=b)
39
  # X = a * X + B @ X
40
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
 
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
 
 
 
 
8
 
9
+ # This code snippet is a modified version adapted from the following GitHub repositories:
10
+ # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
  @torch.no_grad()
12
  def _zeropower_via_newtonschulz5(G, steps):
13
  """
 
32
  A = X @ X.T
33
  # B = (
34
  # b * A + c * A @ A
35
+ # )
36
  B = torch.addmm(A, A, A, alpha=c, beta=b)
37
  # X = a * X + B @ X
38
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
build/torch26-cxx98-cu118-x86_64-linux/optimizer/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _optimizer_bdd2678_dirty
3
- ops = torch.ops._optimizer_bdd2678_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_optimizer_bdd2678_dirty::{op_name}"
 
1
  import torch
2
+ from . import _optimizer_febdf5b_dirty
3
+ ops = torch.ops._optimizer_febdf5b_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_optimizer_febdf5b_dirty::{op_name}"
build/torch26-cxx98-cu118-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4ca177074d4c04630ffaa2e49e41e1451bf198c44c4cc544a664be88475a3b9
3
  size 1787192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c4408eaf54197941241ae6150afe1401a8bcf5854488a8b957d1f1546b388a
3
  size 1787192
build/torch26-cxx98-cu118-x86_64-linux/optimizer/muon.py CHANGED
@@ -5,11 +5,9 @@ import torch
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
8
- # TODO leave original url and consider LICENSE
9
- # This code snippet is a modified version adapted from the following GitHub repository:
10
- # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
-
12
 
 
 
13
  @torch.no_grad()
14
  def _zeropower_via_newtonschulz5(G, steps):
15
  """
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
34
  A = X @ X.T
35
  # B = (
36
  # b * A + c * A @ A
37
- # ) # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng
38
  B = torch.addmm(A, A, A, alpha=c, beta=b)
39
  # X = a * X + B @ X
40
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
 
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
 
 
 
 
8
 
9
+ # This code snippet is a modified version adapted from the following GitHub repositories:
10
+ # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
  @torch.no_grad()
12
  def _zeropower_via_newtonschulz5(G, steps):
13
  """
 
32
  A = X @ X.T
33
  # B = (
34
  # b * A + c * A @ A
35
+ # )
36
  B = torch.addmm(A, A, A, alpha=c, beta=b)
37
  # X = a * X + B @ X
38
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
build/torch26-cxx98-cu124-x86_64-linux/optimizer/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _optimizer_bdd2678_dirty
3
- ops = torch.ops._optimizer_bdd2678_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_optimizer_bdd2678_dirty::{op_name}"
 
1
  import torch
2
+ from . import _optimizer_febdf5b_dirty
3
+ ops = torch.ops._optimizer_febdf5b_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_optimizer_febdf5b_dirty::{op_name}"
build/torch26-cxx98-cu124-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3511c3a46297462166d7b773dc2bd8b16f43b7004eee1e4b31d468113051fb55
3
  size 1824184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:166d253c91459e1aa1328a1550b0e3ec4bb7c6057870b1d7472a93cc987cf85a
3
  size 1824184
build/torch26-cxx98-cu124-x86_64-linux/optimizer/muon.py CHANGED
@@ -5,11 +5,9 @@ import torch
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
8
- # TODO leave original url and consider LICENSE
9
- # This code snippet is a modified version adapted from the following GitHub repository:
10
- # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
-
12
 
 
 
13
  @torch.no_grad()
14
  def _zeropower_via_newtonschulz5(G, steps):
15
  """
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
34
  A = X @ X.T
35
  # B = (
36
  # b * A + c * A @ A
37
- # ) # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng
38
  B = torch.addmm(A, A, A, alpha=c, beta=b)
39
  # X = a * X + B @ X
40
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
 
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
 
 
 
 
8
 
9
+ # This code snippet is a modified version adapted from the following GitHub repositories:
10
+ # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
  @torch.no_grad()
12
  def _zeropower_via_newtonschulz5(G, steps):
13
  """
 
32
  A = X @ X.T
33
  # B = (
34
  # b * A + c * A @ A
35
+ # )
36
  B = torch.addmm(A, A, A, alpha=c, beta=b)
37
  # X = a * X + B @ X
38
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
build/torch26-cxx98-cu126-x86_64-linux/optimizer/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _optimizer_bdd2678_dirty
3
- ops = torch.ops._optimizer_bdd2678_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_optimizer_bdd2678_dirty::{op_name}"
 
1
  import torch
2
+ from . import _optimizer_febdf5b_dirty
3
+ ops = torch.ops._optimizer_febdf5b_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_optimizer_febdf5b_dirty::{op_name}"
build/torch26-cxx98-cu126-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fa1dad3b3c1d94e7613a35e42afb8c7974d7bf6ce25cd2766590ba65b129f07
3
  size 1824184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bb7315b326f9af7a77e023c2b78511190235a8dcc9682abd5b49db1dc2b90f2
3
  size 1824184
build/torch26-cxx98-cu126-x86_64-linux/optimizer/muon.py CHANGED
@@ -5,11 +5,9 @@ import torch
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
8
- # TODO leave original url and consider LICENSE
9
- # This code snippet is a modified version adapted from the following GitHub repository:
10
- # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
-
12
 
 
 
13
  @torch.no_grad()
14
  def _zeropower_via_newtonschulz5(G, steps):
15
  """
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
34
  A = X @ X.T
35
  # B = (
36
  # b * A + c * A @ A
37
- # ) # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng
38
  B = torch.addmm(A, A, A, alpha=c, beta=b)
39
  # X = a * X + B @ X
40
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
 
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
 
 
 
 
8
 
9
+ # This code snippet is a modified version adapted from the following GitHub repositories:
10
+ # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
  @torch.no_grad()
12
  def _zeropower_via_newtonschulz5(G, steps):
13
  """
 
32
  A = X @ X.T
33
  # B = (
34
  # b * A + c * A @ A
35
+ # )
36
  B = torch.addmm(A, A, A, alpha=c, beta=b)
37
  # X = a * X + B @ X
38
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
build/torch27-cxx11-cu118-x86_64-linux/optimizer/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _optimizer_bdd2678_dirty
3
- ops = torch.ops._optimizer_bdd2678_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_optimizer_bdd2678_dirty::{op_name}"
 
1
  import torch
2
+ from . import _optimizer_febdf5b_dirty
3
+ ops = torch.ops._optimizer_febdf5b_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_optimizer_febdf5b_dirty::{op_name}"
build/torch27-cxx11-cu118-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe5761d07ed965bf94d00d8a8e6753a7fb571271e73773de9021511e0e6ae2c7
3
  size 1787368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a74351ee471271eaf1c8292ed01b7e71e6b1b683704144d68d90b67032ba386
3
  size 1787368
build/torch27-cxx11-cu118-x86_64-linux/optimizer/muon.py CHANGED
@@ -5,11 +5,9 @@ import torch
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
8
- # TODO leave original url and consider LICENSE
9
- # This code snippet is a modified version adapted from the following GitHub repository:
10
- # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
-
12
 
 
 
13
  @torch.no_grad()
14
  def _zeropower_via_newtonschulz5(G, steps):
15
  """
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
34
  A = X @ X.T
35
  # B = (
36
  # b * A + c * A @ A
37
- # ) # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng
38
  B = torch.addmm(A, A, A, alpha=c, beta=b)
39
  # X = a * X + B @ X
40
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
 
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
 
 
 
 
8
 
9
+ # This code snippet is a modified version adapted from the following GitHub repositories:
10
+ # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
  @torch.no_grad()
12
  def _zeropower_via_newtonschulz5(G, steps):
13
  """
 
32
  A = X @ X.T
33
  # B = (
34
  # b * A + c * A @ A
35
+ # )
36
  B = torch.addmm(A, A, A, alpha=c, beta=b)
37
  # X = a * X + B @ X
38
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
build/torch27-cxx11-cu126-x86_64-linux/optimizer/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _optimizer_bdd2678_dirty
3
- ops = torch.ops._optimizer_bdd2678_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_optimizer_bdd2678_dirty::{op_name}"
 
1
  import torch
2
+ from . import _optimizer_febdf5b_dirty
3
+ ops = torch.ops._optimizer_febdf5b_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_optimizer_febdf5b_dirty::{op_name}"
build/torch27-cxx11-cu126-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a35c1c4d46f677f0fe35fec9023a866b9bd0f4245624b4e71a9812a1864c01e6
3
  size 1824256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffb7e3a786405106908da16e74506fe381b09e5e04a27b1062396e378f63f7f7
3
  size 1824256
build/torch27-cxx11-cu126-x86_64-linux/optimizer/muon.py CHANGED
@@ -5,11 +5,9 @@ import torch
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
8
- # TODO leave original url and consider LICENSE
9
- # This code snippet is a modified version adapted from the following GitHub repository:
10
- # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
-
12
 
 
 
13
  @torch.no_grad()
14
  def _zeropower_via_newtonschulz5(G, steps):
15
  """
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
34
  A = X @ X.T
35
  # B = (
36
  # b * A + c * A @ A
37
- # ) # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng
38
  B = torch.addmm(A, A, A, alpha=c, beta=b)
39
  # X = a * X + B @ X
40
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
 
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
 
 
 
 
8
 
9
+ # This code snippet is a modified version adapted from the following GitHub repositories:
10
+ # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
  @torch.no_grad()
12
  def _zeropower_via_newtonschulz5(G, steps):
13
  """
 
32
  A = X @ X.T
33
  # B = (
34
  # b * A + c * A @ A
35
+ # )
36
  B = torch.addmm(A, A, A, alpha=c, beta=b)
37
  # X = a * X + B @ X
38
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
build/torch27-cxx11-cu128-x86_64-linux/optimizer/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _optimizer_bdd2678_dirty
3
- ops = torch.ops._optimizer_bdd2678_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_optimizer_bdd2678_dirty::{op_name}"
 
1
  import torch
2
+ from . import _optimizer_febdf5b_dirty
3
+ ops = torch.ops._optimizer_febdf5b_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_optimizer_febdf5b_dirty::{op_name}"
build/torch27-cxx11-cu128-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b27a334f5b3c1dd922468fc93662f90cc95b4213f3f96a212e34ea8e4f3bf03
3
  size 1883352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ee6c653f216af96705a25993d85751648ccd4714a8d6c8c36bdbc8dc19edc5
3
  size 1883352
build/torch27-cxx11-cu128-x86_64-linux/optimizer/muon.py CHANGED
@@ -5,11 +5,9 @@ import torch
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
8
- # TODO leave original url and consider LICENSE
9
- # This code snippet is a modified version adapted from the following GitHub repository:
10
- # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
-
12
 
 
 
13
  @torch.no_grad()
14
  def _zeropower_via_newtonschulz5(G, steps):
15
  """
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
34
  A = X @ X.T
35
  # B = (
36
  # b * A + c * A @ A
37
- # ) # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng
38
  B = torch.addmm(A, A, A, alpha=c, beta=b)
39
  # X = a * X + B @ X
40
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
 
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
 
 
 
 
8
 
9
+ # This code snippet is a modified version adapted from the following GitHub repositories:
10
+ # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
  @torch.no_grad()
12
  def _zeropower_via_newtonschulz5(G, steps):
13
  """
 
32
  A = X @ X.T
33
  # B = (
34
  # b * A + c * A @ A
35
+ # )
36
  B = torch.addmm(A, A, A, alpha=c, beta=b)
37
  # X = a * X + B @ X
38
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
build/torch27-cxx11-rocm63-x86_64-linux/optimizer/_ops.py CHANGED
@@ -1,9 +1,9 @@
1
  import torch
2
- from . import _optimizer_bdd2678_dirty
3
- ops = torch.ops._optimizer_bdd2678_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
- return f"_optimizer_bdd2678_dirty::{op_name}"
 
1
  import torch
2
+ from . import _optimizer_febdf5b_dirty
3
+ ops = torch.ops._optimizer_febdf5b_dirty
4
 
5
  def add_op_namespace_prefix(op_name: str):
6
  """
7
  Prefix op by namespace.
8
  """
9
+ return f"_optimizer_febdf5b_dirty::{op_name}"
build/torch27-cxx11-rocm63-x86_64-linux/optimizer/{_optimizer_bdd2678_dirty.abi3.so → _optimizer_febdf5b_dirty.abi3.so} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ae60aac17486a756b1926d38e1c20933f57444688e15ba849da3153adcf434e
3
  size 1749648
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8427dae3274100063f3b003a7cebf9565318fcaa2fa340482b2ec9408e9dcea0
3
  size 1749648
build/torch27-cxx11-rocm63-x86_64-linux/optimizer/muon.py CHANGED
@@ -5,11 +5,9 @@ import torch
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
8
- # TODO leave original url and consider LICENSE
9
- # This code snippet is a modified version adapted from the following GitHub repository:
10
- # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
-
12
 
 
 
13
  @torch.no_grad()
14
  def _zeropower_via_newtonschulz5(G, steps):
15
  """
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
34
  A = X @ X.T
35
  # B = (
36
  # b * A + c * A @ A
37
- # ) # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng
38
  B = torch.addmm(A, A, A, alpha=c, beta=b)
39
  # X = a * X + B @ X
40
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
 
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
 
 
 
 
8
 
9
+ # This code snippet is a modified version adapted from the following GitHub repositories:
10
+ # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
  @torch.no_grad()
12
  def _zeropower_via_newtonschulz5(G, steps):
13
  """
 
32
  A = X @ X.T
33
  # B = (
34
  # b * A + c * A @ A
35
+ # )
36
  B = torch.addmm(A, A, A, alpha=c, beta=b)
37
  # X = a * X + B @ X
38
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
torch-ext/optimizer/muon.py CHANGED
@@ -5,11 +5,9 @@ import torch
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
8
- # TODO leave original url and consider LICENSE
9
- # This code snippet is a modified version adapted from the following GitHub repository:
10
- # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
-
12
 
 
 
13
  @torch.no_grad()
14
  def _zeropower_via_newtonschulz5(G, steps):
15
  """
@@ -34,7 +32,7 @@ def _zeropower_via_newtonschulz5(G, steps):
34
  A = X @ X.T
35
  # B = (
36
  # b * A + c * A @ A
37
- # ) # adapted from suggestion by @jxbz, @leloykun, and @YouJiacheng
38
  B = torch.addmm(A, A, A, alpha=c, beta=b)
39
  # X = a * X + B @ X
40
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)
 
5
  import torch.distributed as dist
6
  from torch.distributed._tensor import DTensor
7
 
 
 
 
 
8
 
9
+ # This code snippet is a modified version adapted from the following GitHub repositories:
10
+ # https://github.com/KellerJordan/Muon/blob/master/muon.py
11
  @torch.no_grad()
12
  def _zeropower_via_newtonschulz5(G, steps):
13
  """
 
32
  A = X @ X.T
33
  # B = (
34
  # b * A + c * A @ A
35
+ # )
36
  B = torch.addmm(A, A, A, alpha=c, beta=b)
37
  # X = a * X + B @ X
38
  X = torch.addmm(X, B, X, alpha=1.0, beta=a)