From ac154a55a069842bf7fa678eb4660b3f68a7c38f Mon Sep 17 00:00:00 2001 From: Philipp Emanuel Weidmann Date: Tue, 9 Dec 2025 11:54:08 +0530 Subject: [PATCH] fix: suppress CoT output for thinking models Ref #75 --- src/heretic/main.py | 8 ++++++++ src/heretic/model.py | 5 +---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/heretic/main.py b/src/heretic/main.py index 5544656..8c187eb 100644 --- a/src/heretic/main.py +++ b/src/heretic/main.py @@ -200,6 +200,14 @@ def run(): # a space, which would result in an uncommon tokenization. model.response_prefix = commonprefix(responses).rstrip(" ") + # Suppress CoT output. + if model.response_prefix.startswith(""): + # Most thinking models. + model.response_prefix = "" + elif model.response_prefix.startswith("<|channel|>analysis<|message|>"): + # gpt-oss. + model.response_prefix = "<|channel|>analysis<|message|><|end|><|start|>assistant<|channel|>final<|message|>" + if model.response_prefix: print(f"* Prefix found: [bold]{model.response_prefix!r}[/]") else: diff --git a/src/heretic/model.py b/src/heretic/model.py index 1f32823..cf8185b 100644 --- a/src/heretic/model.py +++ b/src/heretic/model.py @@ -288,10 +288,7 @@ class Model: ) # Return only the newly generated part. - return self.tokenizer.batch_decode( - outputs[:, inputs["input_ids"].shape[1] :], - skip_special_tokens=True, - ) + return self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1] :]) def get_responses_batched(self, prompts: list[str]) -> list[str]: responses = []