diff --git a/src/heretic/main.py b/src/heretic/main.py index 48eece7..baada1e 100644 --- a/src/heretic/main.py +++ b/src/heretic/main.py @@ -813,6 +813,8 @@ def run(): del merged_model empty_cache() model.tokenizer.save_pretrained(save_directory) + if model.processor is not None: + model.processor.save_pretrained(save_directory) reset_trial_model() print(f"Model saved to [bold]{save_directory}[/].") @@ -923,6 +925,12 @@ def run(): private=private, token=token, ) + if model.processor is not None: + model.processor.push_to_hub( + repo_id, + private=private, + token=token, + ) reset_trial_model() if is_hf_path(settings.model): diff --git a/src/heretic/model.py b/src/heretic/model.py index 9afff98..92eb98c 100644 --- a/src/heretic/model.py +++ b/src/heretic/model.py @@ -17,12 +17,14 @@ from torch.nn import Module, ModuleList from transformers import ( AutoModelForCausalLM, AutoModelForImageTextToText, + AutoProcessor, AutoTokenizer, BatchEncoding, BitsAndBytesConfig, PretrainedConfig, PreTrainedModel, PreTrainedTokenizerBase, + ProcessorMixin, TextStreamer, ) from transformers.generation import ( @@ -56,6 +58,8 @@ class AbliterationParameters: class Model: model: PreTrainedModel | PeftModel tokenizer: PreTrainedTokenizerBase + # Set for multimodal models, None for text-only ones. + processor: ProcessorMixin | None peft_config: LoraConfig def __init__(self, settings: Settings): @@ -75,6 +79,15 @@ class Model: **self.revision_kwargs, ) + # Multimodal models have a processor we'll want to save. + self.processor = None + if get_model_class(settings.model) == AutoModelForImageTextToText: + self.processor = AutoProcessor.from_pretrained( + settings.model, + trust_remote_code=settings.trust_remote_code, + **self.revision_kwargs, + ) + # Fallback for tokenizers that don't declare a special pad token. if self.tokenizer.pad_token is None: self.tokenizer.pad_token = self.tokenizer.eos_token