feat: save processor for multimodal models (#353)
* feat: save processor for multimodal models VL models load via AutoModelForImageTextToText, but only the tokenizer was saved/pushed, dropping the processor's image/audio preprocessing config. Save/push it alongside the tokenizer so multimodal models stay complete. * Update src/heretic/model.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Adjusted processor type to use ProcessorMixin --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -813,6 +813,8 @@ def run():
|
|||||||
del merged_model
|
del merged_model
|
||||||
empty_cache()
|
empty_cache()
|
||||||
model.tokenizer.save_pretrained(save_directory)
|
model.tokenizer.save_pretrained(save_directory)
|
||||||
|
if model.processor is not None:
|
||||||
|
model.processor.save_pretrained(save_directory)
|
||||||
reset_trial_model()
|
reset_trial_model()
|
||||||
|
|
||||||
print(f"Model saved to [bold]{save_directory}[/].")
|
print(f"Model saved to [bold]{save_directory}[/].")
|
||||||
@@ -923,6 +925,12 @@ def run():
|
|||||||
private=private,
|
private=private,
|
||||||
token=token,
|
token=token,
|
||||||
)
|
)
|
||||||
|
if model.processor is not None:
|
||||||
|
model.processor.push_to_hub(
|
||||||
|
repo_id,
|
||||||
|
private=private,
|
||||||
|
token=token,
|
||||||
|
)
|
||||||
reset_trial_model()
|
reset_trial_model()
|
||||||
|
|
||||||
if is_hf_path(settings.model):
|
if is_hf_path(settings.model):
|
||||||
|
|||||||
@@ -17,12 +17,14 @@ from torch.nn import Module, ModuleList
|
|||||||
from transformers import (
|
from transformers import (
|
||||||
AutoModelForCausalLM,
|
AutoModelForCausalLM,
|
||||||
AutoModelForImageTextToText,
|
AutoModelForImageTextToText,
|
||||||
|
AutoProcessor,
|
||||||
AutoTokenizer,
|
AutoTokenizer,
|
||||||
BatchEncoding,
|
BatchEncoding,
|
||||||
BitsAndBytesConfig,
|
BitsAndBytesConfig,
|
||||||
PretrainedConfig,
|
PretrainedConfig,
|
||||||
PreTrainedModel,
|
PreTrainedModel,
|
||||||
PreTrainedTokenizerBase,
|
PreTrainedTokenizerBase,
|
||||||
|
ProcessorMixin,
|
||||||
TextStreamer,
|
TextStreamer,
|
||||||
)
|
)
|
||||||
from transformers.generation import (
|
from transformers.generation import (
|
||||||
@@ -56,6 +58,8 @@ class AbliterationParameters:
|
|||||||
class Model:
|
class Model:
|
||||||
model: PreTrainedModel | PeftModel
|
model: PreTrainedModel | PeftModel
|
||||||
tokenizer: PreTrainedTokenizerBase
|
tokenizer: PreTrainedTokenizerBase
|
||||||
|
# Set for multimodal models, None for text-only ones.
|
||||||
|
processor: ProcessorMixin | None
|
||||||
peft_config: LoraConfig
|
peft_config: LoraConfig
|
||||||
|
|
||||||
def __init__(self, settings: Settings):
|
def __init__(self, settings: Settings):
|
||||||
@@ -75,6 +79,15 @@ class Model:
|
|||||||
**self.revision_kwargs,
|
**self.revision_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Multimodal models have a processor we'll want to save.
|
||||||
|
self.processor = None
|
||||||
|
if get_model_class(settings.model) == AutoModelForImageTextToText:
|
||||||
|
self.processor = AutoProcessor.from_pretrained(
|
||||||
|
settings.model,
|
||||||
|
trust_remote_code=settings.trust_remote_code,
|
||||||
|
**self.revision_kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
# Fallback for tokenizers that don't declare a special pad token.
|
# Fallback for tokenizers that don't declare a special pad token.
|
||||||
if self.tokenizer.pad_token is None:
|
if self.tokenizer.pad_token is None:
|
||||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||||
|
|||||||
Reference in New Issue
Block a user