feat: save processor for multimodal models (#353)

* feat: save processor for multimodal models

VL models load via AutoModelForImageTextToText, but only the tokenizer was
saved/pushed, dropping the processor's image/audio preprocessing config.
Save/push it alongside the tokenizer so multimodal models stay complete.

* Update src/heretic/model.py

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>

* Adjusted processor type to use ProcessorMixin

---------

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
zaakir
2026-06-05 15:11:45 +01:00
committed by GitHub
parent 46b5ced274
commit 61c59f7227
2 changed files with 21 additions and 0 deletions
+8
View File
@@ -813,6 +813,8 @@ def run():
del merged_model
empty_cache()
model.tokenizer.save_pretrained(save_directory)
if model.processor is not None:
model.processor.save_pretrained(save_directory)
reset_trial_model()
print(f"Model saved to [bold]{save_directory}[/].")
@@ -923,6 +925,12 @@ def run():
private=private,
token=token,
)
if model.processor is not None:
model.processor.push_to_hub(
repo_id,
private=private,
token=token,
)
reset_trial_model()
if is_hf_path(settings.model):
+13
View File
@@ -17,12 +17,14 @@ from torch.nn import Module, ModuleList
from transformers import (
AutoModelForCausalLM,
AutoModelForImageTextToText,
AutoProcessor,
AutoTokenizer,
BatchEncoding,
BitsAndBytesConfig,
PretrainedConfig,
PreTrainedModel,
PreTrainedTokenizerBase,
ProcessorMixin,
TextStreamer,
)
from transformers.generation import (
@@ -56,6 +58,8 @@ class AbliterationParameters:
class Model:
model: PreTrainedModel | PeftModel
tokenizer: PreTrainedTokenizerBase
# Set for multimodal models, None for text-only ones.
processor: ProcessorMixin | None
peft_config: LoraConfig
def __init__(self, settings: Settings):
@@ -75,6 +79,15 @@ class Model:
**self.revision_kwargs,
)
# Multimodal models have a processor we'll want to save.
self.processor = None
if get_model_class(settings.model) == AutoModelForImageTextToText:
self.processor = AutoProcessor.from_pretrained(
settings.model,
trust_remote_code=settings.trust_remote_code,
**self.revision_kwargs,
)
# Fallback for tokenizers that don't declare a special pad token.
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token