Support gpt-oss MoE

2025-10-15 17:51:39 +05:30
parent 7ed0cb1ffb
commit ed65d6902b
1 changed files with 8 additions and 0 deletions
@@ -112,6 +112,14 @@ class Model:
                for expert in layer.block_sparse_moe.experts:
                    try_add(expert.w2.weight)

+        # gpt-oss MoE.
+        if not matrices:
+            with suppress(Exception):
+                # The implementation of gpt-oss in Transformers differs from many other MoE models
+                # in that it stores the down-projections for all experts in a single 3D tensor,
+                # but thanks to PyTorch's broadcasting magic, it all just works anyway.
+                try_add(layer.mlp.experts.down_proj)
+
        # We need at least one MLP down-projection.
        assert matrices