feat: save processor for multimodal models (#353)
* feat: save processor for multimodal models VL models load via AutoModelForImageTextToText, but only the tokenizer was saved/pushed, dropping the processor's image/audio preprocessing config. Save/push it alongside the tokenizer so multimodal models stay complete. * Update src/heretic/model.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Adjusted processor type to use ProcessorMixin --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -813,6 +813,8 @@ def run():
|
||||
del merged_model
|
||||
empty_cache()
|
||||
model.tokenizer.save_pretrained(save_directory)
|
||||
if model.processor is not None:
|
||||
model.processor.save_pretrained(save_directory)
|
||||
reset_trial_model()
|
||||
|
||||
print(f"Model saved to [bold]{save_directory}[/].")
|
||||
@@ -923,6 +925,12 @@ def run():
|
||||
private=private,
|
||||
token=token,
|
||||
)
|
||||
if model.processor is not None:
|
||||
model.processor.push_to_hub(
|
||||
repo_id,
|
||||
private=private,
|
||||
token=token,
|
||||
)
|
||||
reset_trial_model()
|
||||
|
||||
if is_hf_path(settings.model):
|
||||
|
||||
@@ -17,12 +17,14 @@ from torch.nn import Module, ModuleList
|
||||
from transformers import (
|
||||
AutoModelForCausalLM,
|
||||
AutoModelForImageTextToText,
|
||||
AutoProcessor,
|
||||
AutoTokenizer,
|
||||
BatchEncoding,
|
||||
BitsAndBytesConfig,
|
||||
PretrainedConfig,
|
||||
PreTrainedModel,
|
||||
PreTrainedTokenizerBase,
|
||||
ProcessorMixin,
|
||||
TextStreamer,
|
||||
)
|
||||
from transformers.generation import (
|
||||
@@ -56,6 +58,8 @@ class AbliterationParameters:
|
||||
class Model:
|
||||
model: PreTrainedModel | PeftModel
|
||||
tokenizer: PreTrainedTokenizerBase
|
||||
# Set for multimodal models, None for text-only ones.
|
||||
processor: ProcessorMixin | None
|
||||
peft_config: LoraConfig
|
||||
|
||||
def __init__(self, settings: Settings):
|
||||
@@ -75,6 +79,15 @@ class Model:
|
||||
**self.revision_kwargs,
|
||||
)
|
||||
|
||||
# Multimodal models have a processor we'll want to save.
|
||||
self.processor = None
|
||||
if get_model_class(settings.model) == AutoModelForImageTextToText:
|
||||
self.processor = AutoProcessor.from_pretrained(
|
||||
settings.model,
|
||||
trust_remote_code=settings.trust_remote_code,
|
||||
**self.revision_kwargs,
|
||||
)
|
||||
|
||||
# Fallback for tokenizers that don't declare a special pad token.
|
||||
if self.tokenizer.pad_token is None:
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
|
||||
Reference in New Issue
Block a user