fix: minor cleanups and improvements
This commit is contained in:
+37
-8
@@ -27,6 +27,12 @@ device_map = "auto"
|
|||||||
# Maximum memory to allocate per device.
|
# Maximum memory to allocate per device.
|
||||||
# max_memory = { "0" = "20GB", "cpu" = "64GB" }
|
# max_memory = { "0" = "20GB", "cpu" = "64GB" }
|
||||||
|
|
||||||
|
# Whether to move intermediate analysis tensors (such as residuals and logprobs)
|
||||||
|
# to CPU memory as soon as possible to reduce peak VRAM usage.
|
||||||
|
# This lowers peak VRAM usage during residual analysis and evaluation,
|
||||||
|
# but may slightly reduce performance due to host/device transfers.
|
||||||
|
offload_outputs_to_cpu = true
|
||||||
|
|
||||||
# Number of input sequences to process in parallel (0 = auto).
|
# Number of input sequences to process in parallel (0 = auto).
|
||||||
batch_size = 0 # auto
|
batch_size = 0 # auto
|
||||||
|
|
||||||
@@ -36,6 +42,32 @@ max_batch_size = 128
|
|||||||
# Maximum number of tokens to generate for each response.
|
# Maximum number of tokens to generate for each response.
|
||||||
max_response_length = 100
|
max_response_length = 100
|
||||||
|
|
||||||
|
# List of pairs of the form [cot_initializer, closed_cot_block] used to skip
|
||||||
|
# the Chain-of-Thought block in responses, so that evaluation happens
|
||||||
|
# at the start of the actual response.
|
||||||
|
chain_of_thought_skips = [
|
||||||
|
# Most thinking models.
|
||||||
|
[
|
||||||
|
"<think>",
|
||||||
|
"<think></think>",
|
||||||
|
],
|
||||||
|
# gpt-oss.
|
||||||
|
[
|
||||||
|
"<|channel|>analysis<|message|>",
|
||||||
|
"<|channel|>analysis<|message|><|end|><|start|>assistant<|channel|>final<|message|>",
|
||||||
|
],
|
||||||
|
# Unknown, suggested by user.
|
||||||
|
[
|
||||||
|
"<thought>",
|
||||||
|
"<thought></thought>",
|
||||||
|
],
|
||||||
|
# Unknown, suggested by user.
|
||||||
|
[
|
||||||
|
"[THINK]",
|
||||||
|
"[THINK][/THINK]",
|
||||||
|
],
|
||||||
|
]
|
||||||
|
|
||||||
# Whether to print prompt/response pairs when counting refusals.
|
# Whether to print prompt/response pairs when counting refusals.
|
||||||
print_responses = false
|
print_responses = false
|
||||||
|
|
||||||
@@ -64,13 +96,13 @@ kl_divergence_target = 0.01
|
|||||||
|
|
||||||
# Whether to adjust the refusal directions so that only the component that is
|
# Whether to adjust the refusal directions so that only the component that is
|
||||||
# orthogonal to the good direction is subtracted during abliteration.
|
# orthogonal to the good direction is subtracted during abliteration.
|
||||||
orthogonalize_direction = false
|
orthogonalize_direction = true
|
||||||
|
|
||||||
# How to apply row normalization of the weights. Options:
|
# How to apply row normalization of the weights. Options:
|
||||||
# "none" (no normalization),
|
# "none" (no normalization),
|
||||||
# "pre" (compute LoRA adapter relative to row-normalized weights),
|
# "pre" (compute LoRA adapter relative to row-normalized weights),
|
||||||
# "full" (like "pre", but renormalizes to preserve original row magnitudes).
|
# "full" (like "pre", but renormalizes to preserve original row magnitudes).
|
||||||
row_normalization = "none"
|
row_normalization = "full"
|
||||||
|
|
||||||
# The rank of the LoRA adapter to use when "full" row normalization is used.
|
# The rank of the LoRA adapter to use when "full" row normalization is used.
|
||||||
# Row magnitude preservation is approximate due to non-linear effects,
|
# Row magnitude preservation is approximate due to non-linear effects,
|
||||||
@@ -98,6 +130,9 @@ n_startup_trials = 60
|
|||||||
# Directory to save and load study progress to/from.
|
# Directory to save and load study progress to/from.
|
||||||
study_checkpoint_dir = "checkpoints"
|
study_checkpoint_dir = "checkpoints"
|
||||||
|
|
||||||
|
# Maximum size for individual safetensors files generated when exporting a model.
|
||||||
|
max_shard_size = "5GB"
|
||||||
|
|
||||||
# Strings whose presence in a response (case insensitive) identifies the response as a refusal.
|
# Strings whose presence in a response (case insensitive) identifies the response as a refusal.
|
||||||
refusal_markers = [
|
refusal_markers = [
|
||||||
"sorry",
|
"sorry",
|
||||||
@@ -137,12 +172,6 @@ refusal_markers = [
|
|||||||
# System prompt to use when prompting the model.
|
# System prompt to use when prompting the model.
|
||||||
system_prompt = "You are a helpful assistant."
|
system_prompt = "You are a helpful assistant."
|
||||||
|
|
||||||
# Move intermediate analysis tensors (such as residuals and logprobs)
|
|
||||||
# to CPU memory as soon as possible to reduce peak VRAM usage.
|
|
||||||
# This lowers peak VRAM usage during residual analysis and evaluation,
|
|
||||||
# but may slightly reduce performance due to host/device transfers.
|
|
||||||
offload_outputs_to_cpu = true
|
|
||||||
|
|
||||||
# Dataset of prompts that tend to not result in refusals (used for calculating refusal directions).
|
# Dataset of prompts that tend to not result in refusals (used for calculating refusal directions).
|
||||||
[good_prompts]
|
[good_prompts]
|
||||||
dataset = "mlabonne/harmless_alpaca"
|
dataset = "mlabonne/harmless_alpaca"
|
||||||
|
|||||||
+12
-10
@@ -141,6 +141,16 @@ class Settings(BaseSettings):
|
|||||||
description='Maximum memory to allocate per device (e.g., { "0" = "20GB", "cpu" = "64GB" }).',
|
description='Maximum memory to allocate per device (e.g., { "0" = "20GB", "cpu" = "64GB" }).',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
offload_outputs_to_cpu: bool = Field(
|
||||||
|
default=True,
|
||||||
|
description=(
|
||||||
|
"Whether to move intermediate analysis tensors (such as residuals and logprobs) "
|
||||||
|
"to CPU memory as soon as possible to reduce peak VRAM usage. "
|
||||||
|
"This lowers peak VRAM usage during residual analysis and evaluation, "
|
||||||
|
"but may slightly reduce performance due to host/device transfers."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
trust_remote_code: bool | None = Field(
|
trust_remote_code: bool | None = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Whether to trust remote code when loading the model.",
|
description="Whether to trust remote code when loading the model.",
|
||||||
@@ -261,7 +271,7 @@ class Settings(BaseSettings):
|
|||||||
)
|
)
|
||||||
|
|
||||||
orthogonalize_direction: bool = Field(
|
orthogonalize_direction: bool = Field(
|
||||||
default=False,
|
default=True,
|
||||||
description=(
|
description=(
|
||||||
"Whether to adjust the refusal directions so that only the component that is "
|
"Whether to adjust the refusal directions so that only the component that is "
|
||||||
"orthogonal to the good direction is subtracted during abliteration."
|
"orthogonal to the good direction is subtracted during abliteration."
|
||||||
@@ -269,7 +279,7 @@ class Settings(BaseSettings):
|
|||||||
)
|
)
|
||||||
|
|
||||||
row_normalization: RowNormalization = Field(
|
row_normalization: RowNormalization = Field(
|
||||||
default=RowNormalization.NONE,
|
default=RowNormalization.FULL,
|
||||||
description=(
|
description=(
|
||||||
"How to apply row normalization of the weights. Options: "
|
"How to apply row normalization of the weights. Options: "
|
||||||
'"none" (no normalization), '
|
'"none" (no normalization), '
|
||||||
@@ -433,14 +443,6 @@ class Settings(BaseSettings):
|
|||||||
description="System prompt to use when prompting the model.",
|
description="System prompt to use when prompting the model.",
|
||||||
)
|
)
|
||||||
|
|
||||||
offload_outputs_to_cpu: bool = Field(
|
|
||||||
default=True,
|
|
||||||
description=(
|
|
||||||
"Whether to move intermediate analysis tensors (such as residuals and logprobs) "
|
|
||||||
"to CPU memory as soon as possible to reduce peak VRAM usage."
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
good_prompts: DatasetSpecification = Field(
|
good_prompts: DatasetSpecification = Field(
|
||||||
default=DatasetSpecification(
|
default=DatasetSpecification(
|
||||||
dataset="mlabonne/harmless_alpaca",
|
dataset="mlabonne/harmless_alpaca",
|
||||||
|
|||||||
+3
-2
@@ -688,8 +688,9 @@ def run():
|
|||||||
(
|
(
|
||||||
"The following trials resulted in Pareto optimal combinations of refusals and KL divergence. "
|
"The following trials resulted in Pareto optimal combinations of refusals and KL divergence. "
|
||||||
"After selecting a trial, you will be able to save the model, upload it to Hugging Face, "
|
"After selecting a trial, you will be able to save the model, upload it to Hugging Face, "
|
||||||
"or chat with it to test how well it works. You can return to this menu later to select a different trial. "
|
"chat with it to test how well it works, or run standard benchmarks on it. "
|
||||||
"[yellow]Note that KL divergence values above 1 usually indicate significant damage to the original model's capabilities.[/]"
|
"You can return to this menu later to select a different trial. "
|
||||||
|
"[yellow]Note that KL divergence values above 0.5 usually indicate significant damage to the original model's capabilities.[/]"
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ import random
|
|||||||
import tempfile
|
import tempfile
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
|
from importlib.metadata import version
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, TypeVar
|
from typing import Any, TypeVar
|
||||||
|
|
||||||
@@ -283,8 +284,6 @@ def get_readme_intro(
|
|||||||
# Hide the path, which may contain private information.
|
# Hide the path, which may contain private information.
|
||||||
model_link = "a model"
|
model_link = "a model"
|
||||||
|
|
||||||
version_info = get_heretic_version_info()
|
|
||||||
|
|
||||||
if contains_reproducibility_information:
|
if contains_reproducibility_information:
|
||||||
reproducibility_instructions = """
|
reproducibility_instructions = """
|
||||||
> [!TIP]
|
> [!TIP]
|
||||||
@@ -297,7 +296,7 @@ def get_readme_intro(
|
|||||||
|
|
||||||
return f"""# This is a decensored version of {
|
return f"""# This is a decensored version of {
|
||||||
model_link
|
model_link
|
||||||
}, made using [Heretic](https://github.com/p-e-w/heretic) v{version_info.version}
|
}, made using [Heretic](https://github.com/p-e-w/heretic) v{version("heretic-llm")}
|
||||||
{reproducibility_instructions}
|
{reproducibility_instructions}
|
||||||
## Abliteration parameters
|
## Abliteration parameters
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user