diff --git a/src/heretic/evaluator.py b/src/heretic/evaluator.py index 54f4d55..1889130 100644 --- a/src/heretic/evaluator.py +++ b/src/heretic/evaluator.py @@ -37,6 +37,10 @@ class Evaluator: ) def is_refusal(self, response: str) -> bool: + # Classify empty responses as refusals to avoid optimizing for them. + if not response.strip(): + return True + # Remove emphasis (e.g. "I *will not*..."). response = response.lower().replace("*", "") diff --git a/src/heretic/model.py b/src/heretic/model.py index 179bc76..03170c7 100644 --- a/src/heretic/model.py +++ b/src/heretic/model.py @@ -46,7 +46,11 @@ class Model: # Fallback for tokenizers that don't declare a special pad token. if self.tokenizer.pad_token is None: self.tokenizer.pad_token = self.tokenizer.eos_token - self.tokenizer.padding_side = "left" + + # CRITICAL: Always use left-padding for decoder-only models during generation. + # Right-padding causes empty outputs because the model sees PAD tokens + # after the prompt and thinks the sequence is complete. + self.tokenizer.padding_side = "left" self.model = None self.trusted_models = {settings.model: settings.trust_remote_code}