From ac154a55a069842bf7fa678eb4660b3f68a7c38f Mon Sep 17 00:00:00 2001
From: Philipp Emanuel Weidmann <pew@worldwidemann.com>
Date: Tue, 9 Dec 2025 11:54:08 +0530
Subject: [PATCH] fix: suppress CoT output for thinking models

Ref #75
---
 src/heretic/main.py  | 8 ++++++++
 src/heretic/model.py | 5 +----
 2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/src/heretic/main.py b/src/heretic/main.py
index 5544656..8c187eb 100644
--- a/src/heretic/main.py
+++ b/src/heretic/main.py
@@ -200,6 +200,14 @@ def run():
     # a space, which would result in an uncommon tokenization.
     model.response_prefix = commonprefix(responses).rstrip(" ")
 
+    # Suppress CoT output.
+    if model.response_prefix.startswith("<think>"):
+        # Most thinking models.
+        model.response_prefix = "<think></think>"
+    elif model.response_prefix.startswith("<|channel|>analysis<|message|>"):
+        # gpt-oss.
+        model.response_prefix = "<|channel|>analysis<|message|><|end|><|start|>assistant<|channel|>final<|message|>"
+
     if model.response_prefix:
         print(f"* Prefix found: [bold]{model.response_prefix!r}[/]")
     else:
diff --git a/src/heretic/model.py b/src/heretic/model.py
index 1f32823..cf8185b 100644
--- a/src/heretic/model.py
+++ b/src/heretic/model.py
@@ -288,10 +288,7 @@ class Model:
         )
 
         # Return only the newly generated part.
-        return self.tokenizer.batch_decode(
-            outputs[:, inputs["input_ids"].shape[1] :],
-            skip_special_tokens=True,
-        )
+        return self.tokenizer.batch_decode(outputs[:, inputs["input_ids"].shape[1] :])
 
     def get_responses_batched(self, prompts: list[str]) -> list[str]:
         responses = []