Add support for Granite MoE Hybrid in model.py by including down projections for shared MLP and MoE experts (#14)

2025-11-18 06:02:58 +03:00
parent 7bad84b4f1
commit 61fdf72b42
1 changed files with 9 additions and 0 deletions
@@ -141,6 +141,15 @@ class Model:
            # but thanks to PyTorch's broadcasting magic, it all just works anyway.
            try_add("mlp.down_proj", layer.mlp.experts.down_proj)
        # Granite MoE Hybrid - attention layers with shared_mlp.
        with suppress(Exception):
            try_add("mlp.down_proj", layer.shared_mlp.output_linear.weight)
        # Granite MoE Hybrid - MoE layers with experts.
        with suppress(Exception):
            for expert in layer.moe.experts:
                try_add("mlp.down_proj", expert.output_linear.weight)
        # We need at least one MLP down-projection.
        assert matrices["mlp.down_proj"]