Support gpt-oss MoE

This commit is contained in:
Philipp Emanuel Weidmann
2025-10-15 17:51:39 +05:30
parent 7ed0cb1ffb
commit ed65d6902b
+8
View File
@@ -112,6 +112,14 @@ class Model:
for expert in layer.block_sparse_moe.experts:
try_add(expert.w2.weight)
# gpt-oss MoE.
if not matrices:
with suppress(Exception):
# The implementation of gpt-oss in Transformers differs from many other MoE models
# in that it stores the down-projections for all experts in a single 3D tensor,
# but thanks to PyTorch's broadcasting magic, it all just works anyway.
try_add(layer.mlp.experts.down_proj)
# We need at least one MLP down-projection.
assert matrices