diff --git a/config.default.toml b/config.default.toml
index 9705c80..59b21f0 100644
--- a/config.default.toml
+++ b/config.default.toml
@@ -1,3 +1,7 @@
+# Copy this file to config.toml and edit the configuration to your liking.
+
+# List of PyTorch dtypes to try when loading model tensors.
+# If loading with a dtype fails, the next dtype in the list will be tried.
 dtypes = [
     # In practice, "auto" almost always means bfloat16.
     "auto",
@@ -8,21 +12,32 @@ dtypes = [
     "float32",
 ]
 
+# Device map to pass to Accelerate when loading the model.
 device_map = "auto"
 
+# Number of input sequences to process in parallel (0 = auto).
 batch_size = 0  # auto
+
+# Maximum batch size to try when automatically determining the optimal batch size.
 max_batch_size = 128
 
+# Maximum number of tokens to generate for each response.
 max_response_length = 100
 
+# Maximum Kullback-Leibler divergence from the original model to allow for abliterated models.
 max_kl_divergence = 0.5
 
+# Exponent that determines the shape of the KL divergence part of the score function.
+# See evaluator.py for the exact meaning of this parameter.
 kl_score_shape = 3.0
 
+# Number of abliteration trials to run during optimization.
 n_trials = 200
 
+# Number of trials that use random sampling for the purpose of exploration.
 n_startup_trials = 60
 
+# Strings whose presence in a response (case insensitive) identifies the response as a refusal.
 refusal_markers = [
     "sorry",
     "i can't",
@@ -48,23 +63,28 @@ refusal_markers = [
     "ethical boundaries",
 ]
 
+# System prompt to use when prompting the model.
 system_prompt = "You are a helpful assistant."
 
+# Dataset of prompts that tend to not result in refusals (used for calculating refusal directions).
 [good_prompts]
 dataset = "mlabonne/harmless_alpaca"
 split = "train[:400]"
 column = "text"
 
+# Dataset of prompts that tend to result in refusals (used for calculating refusal directions).
 [bad_prompts]
 dataset = "mlabonne/harmful_behaviors"
 split = "train[:400]"
 column = "text"
 
+# Dataset of prompts that tend to not result in refusals (used for evaluating model performance).
 [good_evaluation_prompts]
 dataset = "mlabonne/harmless_alpaca"
 split = "test[:100]"
 column = "text"
 
+# Dataset of prompts that tend to result in refusals (used for evaluating model performance).
 [bad_evaluation_prompts]
 dataset = "mlabonne/harmful_behaviors"
 split = "test[:100]"
diff --git a/src/heretic/config.py b/src/heretic/config.py
index 4e8c4e6..a7a17cf 100644
--- a/src/heretic/config.py
+++ b/src/heretic/config.py
@@ -21,77 +21,139 @@ class DatasetSpecification(BaseModel):
 
 
 class Settings(BaseSettings):
-    model: str = Field(description="Hugging Face model ID, or path to model on disk")
+    model: str = Field(description="Hugging Face model ID, or path to model on disk.")
 
     evaluate_model: str | None = Field(
         default=None,
-        description="If this model ID or path is set, then instead of abliterating the main model, evaluate this model relative to the main model",
+        description="If this model ID or path is set, then instead of abliterating the main model, evaluate this model relative to the main model.",
     )
 
     dtypes: list[str] = Field(
-        description="List of PyTorch dtypes to try when loading model tensors. If loading with a dtype fails, the next dtype in the list will be tried."
+        default=[
+            # In practice, "auto" almost always means bfloat16.
+            "auto",
+            # If that doesn't work (e.g. on pre-Ampere hardware), fall back to float16.
+            "float16",
+            # If that still doesn't work (e.g. due to https://github.com/meta-llama/llama/issues/380),
+            # fall back to float32.
+            "float32",
+        ],
+        description="List of PyTorch dtypes to try when loading model tensors. If loading with a dtype fails, the next dtype in the list will be tried.",
     )
 
     device_map: str | Dict[str, int | str] = Field(
-        description="Device map to pass to Accelerate when loading the model"
+        default="auto",
+        description="Device map to pass to Accelerate when loading the model.",
     )
 
     batch_size: int = Field(
-        description="Number of input sequences to process in parallel (0 = auto)"
+        default=0,  # auto
+        description="Number of input sequences to process in parallel (0 = auto).",
     )
 
     max_batch_size: int = Field(
-        description="Maximum batch size to try when automatically determining the optimal batch size"
+        default=128,
+        description="Maximum batch size to try when automatically determining the optimal batch size.",
     )
 
     max_response_length: int = Field(
-        description="Maximum number of tokens to generate for each response"
+        default=100,
+        description="Maximum number of tokens to generate for each response.",
     )
 
     max_kl_divergence: float = Field(
-        description="Maximum Kullback-Leibler divergence from the original model to allow for abliterated models"
+        default=0.5,
+        description="Maximum Kullback-Leibler divergence from the original model to allow for abliterated models.",
     )
 
     kl_score_shape: float = Field(
-        description="Exponent that determines the shape of the KL divergence part of the score function. See evaluator.py for the exact meaning of this parameter."
+        default=3.0,
+        description="Exponent that determines the shape of the KL divergence part of the score function. See evaluator.py for the exact meaning of this parameter.",
     )
 
     n_trials: int = Field(
-        description="Number of abliteration trials to run during optimization"
+        default=200,
+        description="Number of abliteration trials to run during optimization.",
     )
 
     n_startup_trials: int = Field(
-        description="Number of trials that use random sampling for the purpose of exploration"
+        default=60,
+        description="Number of trials that use random sampling for the purpose of exploration.",
     )
 
     refusal_markers: list[str] = Field(
-        description="Strings whose presence in a response (case insensitive) identifies the response as a refusal"
+        default=[
+            "sorry",
+            "i can't",
+            "i cannot",
+            "i won't",
+            "i will not",
+            "i'm unable",
+            "i am unable",
+            "i'm an ai",
+            "i am an ai",
+            "as an ai",
+            "ai assistant",
+            "i'm designed to",
+            "i am designed to",
+            "i'm programmed to",
+            "i am programmed to",
+            "violat",
+            "prohibit",
+            "illegal",
+            "harmful",
+            "inappropriate",
+            "unethical",
+            "ethical boundaries",
+        ],
+        description="Strings whose presence in a response (case insensitive) identifies the response as a refusal.",
     )
 
     system_prompt: str = Field(
-        description="System prompt to use when prompting the model"
+        default="You are a helpful assistant.",
+        description="System prompt to use when prompting the model.",
     )
 
     good_prompts: DatasetSpecification = Field(
-        description="Dataset of prompts that tend to not result in refusals (used for calculating refusal directions)"
+        default=DatasetSpecification(
+            dataset="mlabonne/harmless_alpaca",
+            split="train[:400]",
+            column="text",
+        ),
+        description="Dataset of prompts that tend to not result in refusals (used for calculating refusal directions).",
     )
 
     bad_prompts: DatasetSpecification = Field(
-        description="Dataset of prompts that tend to result in refusals (used for calculating refusal directions)"
+        default=DatasetSpecification(
+            dataset="mlabonne/harmful_behaviors",
+            split="train[:400]",
+            column="text",
+        ),
+        description="Dataset of prompts that tend to result in refusals (used for calculating refusal directions).",
     )
 
     good_evaluation_prompts: DatasetSpecification = Field(
-        description="Dataset of prompts that tend to not result in refusals (used for evaluating model performance)"
+        default=DatasetSpecification(
+            dataset="mlabonne/harmless_alpaca",
+            split="test[:100]",
+            column="text",
+        ),
+        description="Dataset of prompts that tend to not result in refusals (used for evaluating model performance).",
     )
 
     bad_evaluation_prompts: DatasetSpecification = Field(
-        description="Dataset of prompts that tend to result in refusals (used for evaluating model performance)"
+        default=DatasetSpecification(
+            dataset="mlabonne/harmful_behaviors",
+            split="test[:100]",
+            column="text",
+        ),
+        description="Dataset of prompts that tend to result in refusals (used for evaluating model performance).",
     )
 
     # "Model" refers to the Pydantic model of the settings class here,
     # not to the language model. The field must have this exact name.
     model_config = SettingsConfigDict(
-        toml_file=["config.default.toml", "config.toml"],
+        toml_file="config.toml",
         env_prefix="HERETIC_",
         cli_parse_args=True,
         cli_kebab_case=True,