Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 6757ada999 | |||
| 2fd163f5e4 | |||
| e735203d56 | |||
| ed14dd14ca | |||
| 1a9d01c002 | |||
| c9ce36ddde | |||
| d68a41fb54 | |||
| a3dbfd21e6 | |||
| 61c59f7227 | |||
| 46b5ced274 |
@@ -1,6 +1,6 @@
|
|||||||
<img width="128" height="128" align="right" alt="Logo" src="https://github.com/user-attachments/assets/df5f2840-2f92-4991-aa57-252747d7182e" />
|
<img width="128" align="right" alt="Logo" src="https://github.com/user-attachments/assets/df5f2840-2f92-4991-aa57-252747d7182e" />
|
||||||
|
|
||||||
# Heretic: Fully automatic censorship removal for language models<br><br>[](https://discord.gg/gdXc48gSyT) [](https://huggingface.co/heretic-org) [](https://codeberg.org/p-e-w/heretic)
|
# Heretic: Fully automatic censorship removal for language models<br><br>[](https://discord.gg/gdXc48gSyT) [](https://matrix.to/#/#heretic:matrix.org) [](https://huggingface.co/heretic-org) [](https://codeberg.org/p-e-w/heretic)
|
||||||
|
|
||||||
[](https://trendshift.io/repositories/20538)
|
[](https://trendshift.io/repositories/20538)
|
||||||
|
|
||||||
@@ -77,7 +77,7 @@ produced by competing abliteration tools:
|
|||||||
[2](https://old.reddit.com/r/LocalLLaMA/comments/1sy18lx/abliterlitics_benchmarks_and_tensor_comparison/).
|
[2](https://old.reddit.com/r/LocalLLaMA/comments/1sy18lx/abliterlitics_benchmarks_and_tensor_comparison/).
|
||||||
|
|
||||||
The community has created and published
|
The community has created and published
|
||||||
[well over 3000](https://huggingface.co/models?other=heretic)
|
[well over 4000](https://huggingface.co/models?other=heretic)
|
||||||
models with Heretic.
|
models with Heretic.
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -123,10 +123,6 @@ n_trials = 200
|
|||||||
# Number of trials that use random sampling for the purpose of exploration.
|
# Number of trials that use random sampling for the purpose of exploration.
|
||||||
n_startup_trials = 60
|
n_startup_trials = 60
|
||||||
|
|
||||||
# Random seed for reproducible optimization. Set to an integer to enable.
|
|
||||||
# Applies to Python's random module, NumPy, PyTorch, and Optuna.
|
|
||||||
# seed = 75
|
|
||||||
|
|
||||||
# Directory to save and load study progress to/from.
|
# Directory to save and load study progress to/from.
|
||||||
study_checkpoint_dir = "checkpoints"
|
study_checkpoint_dir = "checkpoints"
|
||||||
|
|
||||||
|
|||||||
+2
-2
@@ -58,8 +58,8 @@ dev = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
Homepage = "https://github.com/p-e-w/heretic"
|
Homepage = "https://heretic-project.org"
|
||||||
Documentation = "https://github.com/p-e-w/heretic"
|
Documentation = "https://heretic-project.org/tutorial"
|
||||||
Repository = "https://github.com/p-e-w/heretic.git"
|
Repository = "https://github.com/p-e-w/heretic.git"
|
||||||
Issues = "https://github.com/p-e-w/heretic/issues"
|
Issues = "https://github.com/p-e-w/heretic/issues"
|
||||||
Changelog = "https://github.com/p-e-w/heretic/releases"
|
Changelog = "https://github.com/p-e-w/heretic/releases"
|
||||||
|
|||||||
+19
-7
@@ -32,6 +32,11 @@ class RowNormalization(str, Enum):
|
|||||||
FULL = "full"
|
FULL = "full"
|
||||||
|
|
||||||
|
|
||||||
|
class ExportStrategy(str, Enum):
|
||||||
|
MERGE = "merge"
|
||||||
|
ADAPTER = "adapter"
|
||||||
|
|
||||||
|
|
||||||
class DatasetSpecification(BaseModel):
|
class DatasetSpecification(BaseModel):
|
||||||
dataset: str = Field(
|
dataset: str = Field(
|
||||||
description="Hugging Face dataset ID, or path to dataset on disk."
|
description="Hugging Face dataset ID, or path to dataset on disk."
|
||||||
@@ -119,6 +124,15 @@ class Settings(BaseSettings):
|
|||||||
exclude=True,
|
exclude=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
reproduce: str | None = Field(
|
||||||
|
default=None,
|
||||||
|
description=(
|
||||||
|
"If this path or URL to a reproduce.json file is set, load reproduction information "
|
||||||
|
"from that file, and attempt to reproduce the abliterated model it originated from."
|
||||||
|
),
|
||||||
|
exclude=True,
|
||||||
|
)
|
||||||
|
|
||||||
dtypes: list[str] = Field(
|
dtypes: list[str] = Field(
|
||||||
default=[
|
default=[
|
||||||
# In practice, "auto" almost always means bfloat16.
|
# In practice, "auto" almost always means bfloat16.
|
||||||
@@ -167,13 +181,6 @@ class Settings(BaseSettings):
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
trust_remote_code: bool | None = Field(
|
|
||||||
default=None,
|
|
||||||
description="Whether to trust remote code when loading the model.",
|
|
||||||
# For security reasons, we don't store this setting.
|
|
||||||
exclude=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
batch_size: int = Field(
|
batch_size: int = Field(
|
||||||
default=0, # auto
|
default=0, # auto
|
||||||
description="Number of input sequences to process in parallel (0 = auto).",
|
description="Number of input sequences to process in parallel (0 = auto).",
|
||||||
@@ -411,6 +418,11 @@ class Settings(BaseSettings):
|
|||||||
exclude=True,
|
exclude=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
export_strategy: ExportStrategy | None = Field(
|
||||||
|
default=None,
|
||||||
|
description='How to export the model: "merge", "adapter", or unset to prompt the user.',
|
||||||
|
)
|
||||||
|
|
||||||
max_shard_size: int | str = Field(
|
max_shard_size: int | str = Field(
|
||||||
default="5GB",
|
default="5GB",
|
||||||
description="Maximum size for individual safetensors files generated when exporting a model.",
|
description="Maximum size for individual safetensors files generated when exporting a model.",
|
||||||
|
|||||||
+354
-172
@@ -47,7 +47,7 @@ import questionary
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
import torch.nn.functional as F
|
||||||
import transformers
|
import transformers
|
||||||
from huggingface_hub import ModelCard, ModelCardData
|
from huggingface_hub import HfApi, ModelCard, ModelCardData
|
||||||
from lm_eval.models.huggingface import HFLM
|
from lm_eval.models.huggingface import HFLM
|
||||||
from optuna import Trial, TrialPruned
|
from optuna import Trial, TrialPruned
|
||||||
from optuna.exceptions import ExperimentalWarning
|
from optuna.exceptions import ExperimentalWarning
|
||||||
@@ -55,20 +55,26 @@ from optuna.samplers import TPESampler
|
|||||||
from optuna.storages import JournalStorage
|
from optuna.storages import JournalStorage
|
||||||
from optuna.storages.journal import JournalFileBackend, JournalFileOpenLock
|
from optuna.storages.journal import JournalFileBackend, JournalFileOpenLock
|
||||||
from optuna.study import StudyDirection
|
from optuna.study import StudyDirection
|
||||||
from optuna.trial import TrialState
|
from optuna.trial import TrialState, create_trial
|
||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
from questionary import Choice, Style
|
from questionary import Choice, Style
|
||||||
from rich.table import Table
|
from rich.table import Table
|
||||||
from rich.traceback import install
|
from rich.traceback import install
|
||||||
|
|
||||||
from .analyzer import Analyzer
|
from .analyzer import Analyzer
|
||||||
from .config import QuantizationMethod
|
from .config import ExportStrategy, QuantizationMethod
|
||||||
from .evaluator import Evaluator
|
from .evaluator import Evaluator
|
||||||
from .model import AbliterationParameters, Model, get_model_class
|
from .model import AbliterationParameters, Model, get_model_class
|
||||||
from .reproduce import collect_reproducibles
|
from .reproduce import (
|
||||||
|
check_environment,
|
||||||
|
collect_reproducibles,
|
||||||
|
load_reproduction_information,
|
||||||
|
)
|
||||||
from .system import empty_cache, get_accelerator_info
|
from .system import empty_cache, get_accelerator_info
|
||||||
from .utils import (
|
from .utils import (
|
||||||
format_duration,
|
format_duration,
|
||||||
|
format_exception,
|
||||||
|
get_file_sha256,
|
||||||
get_readme_intro,
|
get_readme_intro,
|
||||||
get_trial_parameters,
|
get_trial_parameters,
|
||||||
is_hf_path,
|
is_hf_path,
|
||||||
@@ -84,17 +90,23 @@ from .utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def obtain_merge_strategy(settings: Settings, model: Model) -> str | None:
|
def obtain_export_strategy(
|
||||||
|
settings: Settings,
|
||||||
|
model: Model,
|
||||||
|
) -> ExportStrategy | None:
|
||||||
"""
|
"""
|
||||||
Prompts the user for how to proceed with saving the model.
|
Gets the export strategy from settings or prompts the user.
|
||||||
Provides info to the user if the model is quantized on memory use.
|
Provides info to the user if the model is quantized on memory use.
|
||||||
Returns "merge", "adapter", or None (if cancelled/invalid).
|
Returns an export strategy, or None if cancelled.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if settings.export_strategy is not None:
|
||||||
|
return settings.export_strategy
|
||||||
|
|
||||||
if settings.quantization == QuantizationMethod.BNB_4BIT:
|
if settings.quantization == QuantizationMethod.BNB_4BIT:
|
||||||
print()
|
print()
|
||||||
print(
|
print(
|
||||||
"Model was loaded with quantization. Merging requires reloading the base model."
|
"The model was loaded with quantization. Merging requires reloading the base model."
|
||||||
)
|
)
|
||||||
print(
|
print(
|
||||||
"[yellow]WARNING: CPU merging requires dequantizing the entire model to system RAM.[/]"
|
"[yellow]WARNING: CPU merging requires dequantizing the entire model to system RAM.[/]"
|
||||||
@@ -113,7 +125,9 @@ def obtain_merge_strategy(settings: Settings, model: Model) -> str | None:
|
|||||||
settings.model,
|
settings.model,
|
||||||
device_map="meta",
|
device_map="meta",
|
||||||
torch_dtype=torch.bfloat16,
|
torch_dtype=torch.bfloat16,
|
||||||
trust_remote_code=model.trusted_models.get(settings.model),
|
trust_remote_code=True
|
||||||
|
if settings.model in model.trusted_models
|
||||||
|
else None,
|
||||||
**model.revision_kwargs,
|
**model.revision_kwargs,
|
||||||
)
|
)
|
||||||
footprint_bytes = meta_model.get_memory_footprint()
|
footprint_bytes = meta_model.get_memory_footprint()
|
||||||
@@ -130,23 +144,24 @@ def obtain_merge_strategy(settings: Settings, model: Model) -> str | None:
|
|||||||
print(
|
print(
|
||||||
"[yellow]Example: A 27B model requires ~80GB RAM. A 70B model requires ~200GB RAM.[/]"
|
"[yellow]Example: A 27B model requires ~80GB RAM. A 70B model requires ~200GB RAM.[/]"
|
||||||
)
|
)
|
||||||
|
|
||||||
print()
|
print()
|
||||||
|
|
||||||
strategy = prompt_select(
|
strategy = prompt_select(
|
||||||
"How do you want to proceed?",
|
"How do you want to export the model?",
|
||||||
choices=[
|
choices=[
|
||||||
Choice(
|
Choice(
|
||||||
title="Merge LoRA into full model"
|
title="Merge the abliteration LoRA and export the full model"
|
||||||
+ (
|
+ (
|
||||||
""
|
""
|
||||||
if settings.quantization == QuantizationMethod.NONE
|
if settings.quantization == QuantizationMethod.NONE
|
||||||
else " (requires sufficient RAM)"
|
else " (requires sufficient RAM)"
|
||||||
),
|
),
|
||||||
value="merge",
|
value=ExportStrategy.MERGE,
|
||||||
),
|
),
|
||||||
Choice(
|
Choice(
|
||||||
title="Save LoRA adapter only (can be merged later)",
|
title="Export the abliteration LoRA only (can be merged later)",
|
||||||
value="adapter",
|
value=ExportStrategy.ADAPTER,
|
||||||
),
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
@@ -164,7 +179,9 @@ def run():
|
|||||||
|
|
||||||
# Modified "Pagga" font from https://budavariam.github.io/asciiart-text/
|
# Modified "Pagga" font from https://budavariam.github.io/asciiart-text/
|
||||||
print(f"[cyan]█░█░█▀▀░█▀▄░█▀▀░▀█▀░█░█▀▀[/] v{version('heretic-llm')}")
|
print(f"[cyan]█░█░█▀▀░█▀▄░█▀▀░▀█▀░█░█▀▀[/] v{version('heretic-llm')}")
|
||||||
print("[cyan]█▀█░█▀▀░█▀▄░█▀▀░░█░░█░█░░[/]")
|
print(
|
||||||
|
"[cyan]█▀█░█▀▀░█▀▄░█▀▀░░█░░█░█░░[/] [blue underline]https://heretic-project.org[/]"
|
||||||
|
)
|
||||||
print(
|
print(
|
||||||
"[cyan]▀░▀░▀▀▀░▀░▀░▀▀▀░░▀░░▀░▀▀▀[/] [blue underline]https://github.com/p-e-w/heretic[/]"
|
"[cyan]▀░▀░▀▀▀░▀░▀░▀▀▀░░▀░░▀░▀▀▀[/] [blue underline]https://github.com/p-e-w/heretic[/]"
|
||||||
)
|
)
|
||||||
@@ -175,6 +192,7 @@ def run():
|
|||||||
len(sys.argv) > 1
|
len(sys.argv) > 1
|
||||||
# Heretic is being invoked in standard (model processing) mode.
|
# Heretic is being invoked in standard (model processing) mode.
|
||||||
and "--collect-reproducibles" not in sys.argv
|
and "--collect-reproducibles" not in sys.argv
|
||||||
|
and "--reproduce" not in sys.argv
|
||||||
# No model has been explicitly provided.
|
# No model has been explicitly provided.
|
||||||
and "--model" not in sys.argv
|
and "--model" not in sys.argv
|
||||||
# The last argument is a parameter value rather than a flag (such as "--help").
|
# The last argument is a parameter value rather than a flag (such as "--help").
|
||||||
@@ -185,7 +203,9 @@ def run():
|
|||||||
|
|
||||||
# Work around the "model" argument being required
|
# Work around the "model" argument being required
|
||||||
# when Heretic is invoked in a non-processing mode.
|
# when Heretic is invoked in a non-processing mode.
|
||||||
if "--collect-reproducibles" in sys.argv and "--model" not in sys.argv:
|
if (
|
||||||
|
"--collect-reproducibles" in sys.argv or "--reproduce" in sys.argv
|
||||||
|
) and "--model" not in sys.argv:
|
||||||
sys.argv.extend(["--model", ""])
|
sys.argv.extend(["--model", ""])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -195,8 +215,10 @@ def run():
|
|||||||
except ValidationError as error:
|
except ValidationError as error:
|
||||||
print(f"[red]Configuration contains [bold]{error.error_count()}[/] errors:[/]")
|
print(f"[red]Configuration contains [bold]{error.error_count()}[/] errors:[/]")
|
||||||
|
|
||||||
for error in error.errors():
|
for error_details in error.errors():
|
||||||
print(f"[bold]{error['loc'][0]}[/]: [yellow]{error['msg']}[/]")
|
print(
|
||||||
|
f"[bold]{error_details['loc'][0]}[/]: [yellow]{error_details['msg']}[/]"
|
||||||
|
)
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print(
|
print(
|
||||||
@@ -208,6 +230,31 @@ def run():
|
|||||||
collect_reproducibles(settings.collect_reproducibles)
|
collect_reproducibles(settings.collect_reproducibles)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
reproduction_mode = settings.reproduce is not None
|
||||||
|
|
||||||
|
if settings.reproduce is not None:
|
||||||
|
print(f"Loading reproduction information from [bold]{settings.reproduce}[/]...")
|
||||||
|
# FIXME: "Reproduction"/"reproducibility" name inconsistency!
|
||||||
|
reproduction_information = load_reproduction_information(settings.reproduce)
|
||||||
|
|
||||||
|
if reproduction_information["version"] not in ["1", "2"]:
|
||||||
|
print(
|
||||||
|
(
|
||||||
|
f"[red]Unsupported file format version: [bold]{reproduction_information['version']}[/].[/] "
|
||||||
|
"Try loading the file with a newer version of Heretic."
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return
|
||||||
|
|
||||||
|
if not check_environment(reproduction_information):
|
||||||
|
return
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
verify_hashes = reproduction_information["version"] != "1"
|
||||||
|
|
||||||
|
settings = Settings.model_validate(reproduction_information["settings"])
|
||||||
|
|
||||||
if settings.seed is None:
|
if settings.seed is None:
|
||||||
settings.seed = random.randint(0, 2**32 - 1)
|
settings.seed = random.randint(0, 2**32 - 1)
|
||||||
|
|
||||||
@@ -257,7 +304,11 @@ def run():
|
|||||||
except IndexError:
|
except IndexError:
|
||||||
existing_study = None
|
existing_study = None
|
||||||
|
|
||||||
if existing_study is not None and settings.evaluate_model is None:
|
if (
|
||||||
|
existing_study is not None
|
||||||
|
and settings.evaluate_model is None
|
||||||
|
and not reproduction_mode
|
||||||
|
):
|
||||||
choices = []
|
choices = []
|
||||||
|
|
||||||
if existing_study.user_attrs["finished"]:
|
if existing_study.user_attrs["finished"]:
|
||||||
@@ -362,7 +413,12 @@ def run():
|
|||||||
# We cannot recover from this.
|
# We cannot recover from this.
|
||||||
raise
|
raise
|
||||||
|
|
||||||
print(f"[red]Failed[/] ({error})")
|
formatted = format_exception(error)
|
||||||
|
if "\n" in formatted:
|
||||||
|
print(f"[red]Failed:\n{formatted}[/]")
|
||||||
|
else:
|
||||||
|
print(f"[red]Failed ({formatted})[/]")
|
||||||
|
|
||||||
break
|
break
|
||||||
|
|
||||||
response_lengths = [
|
response_lengths = [
|
||||||
@@ -597,162 +653,185 @@ def run():
|
|||||||
trial.study.stop()
|
trial.study.stop()
|
||||||
raise TrialPruned()
|
raise TrialPruned()
|
||||||
|
|
||||||
study = optuna.create_study(
|
if not reproduction_mode:
|
||||||
sampler=TPESampler(
|
study = optuna.create_study(
|
||||||
n_startup_trials=settings.n_startup_trials,
|
sampler=TPESampler(
|
||||||
n_ei_candidates=128,
|
n_startup_trials=settings.n_startup_trials,
|
||||||
multivariate=True,
|
n_ei_candidates=128,
|
||||||
seed=settings.seed,
|
multivariate=True,
|
||||||
),
|
seed=settings.seed,
|
||||||
directions=[StudyDirection.MINIMIZE, StudyDirection.MINIMIZE],
|
),
|
||||||
storage=storage,
|
directions=[StudyDirection.MINIMIZE, StudyDirection.MINIMIZE],
|
||||||
study_name="heretic",
|
storage=storage,
|
||||||
load_if_exists=True,
|
study_name="heretic",
|
||||||
)
|
load_if_exists=True,
|
||||||
|
|
||||||
study.set_user_attr("settings", settings.model_dump_json())
|
|
||||||
study.set_user_attr("finished", False)
|
|
||||||
|
|
||||||
def count_completed_trials() -> int:
|
|
||||||
# Count number of complete trials to compute trials to run.
|
|
||||||
return sum([(1 if t.state == TrialState.COMPLETE else 0) for t in study.trials])
|
|
||||||
|
|
||||||
start_index = trial_index = count_completed_trials()
|
|
||||||
if start_index > 0:
|
|
||||||
print()
|
|
||||||
print("Resuming existing study.")
|
|
||||||
|
|
||||||
try:
|
|
||||||
study.optimize(
|
|
||||||
objective_wrapper,
|
|
||||||
n_trials=settings.n_trials - count_completed_trials(),
|
|
||||||
)
|
)
|
||||||
except KeyboardInterrupt:
|
|
||||||
# This additional handler takes care of the small chance that KeyboardInterrupt
|
|
||||||
# is raised just between trials, which wouldn't be caught by the handler
|
|
||||||
# defined in objective_wrapper above.
|
|
||||||
pass
|
|
||||||
|
|
||||||
if count_completed_trials() == settings.n_trials:
|
study.set_user_attr("settings", settings.model_dump_json())
|
||||||
study.set_user_attr("finished", True)
|
study.set_user_attr("finished", False)
|
||||||
|
|
||||||
|
start_index = trial_index = len(study.trials)
|
||||||
|
if start_index > 0:
|
||||||
|
print()
|
||||||
|
print("Resuming existing study.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
study.optimize(
|
||||||
|
objective_wrapper,
|
||||||
|
n_trials=settings.n_trials - len(study.trials),
|
||||||
|
)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
# This additional handler takes care of the small chance that KeyboardInterrupt
|
||||||
|
# is raised just between trials, which wouldn't be caught by the handler
|
||||||
|
# defined in objective_wrapper above.
|
||||||
|
pass
|
||||||
|
|
||||||
|
if len(study.trials) == settings.n_trials:
|
||||||
|
study.set_user_attr("finished", True)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
# If no trials at all have been evaluated, the study must have been stopped
|
if not reproduction_mode:
|
||||||
# by pressing Ctrl+C while the first trial was running. In this case, we just
|
# If no trials at all have been evaluated, the study must have been stopped
|
||||||
# re-raise the interrupt to invoke the standard handler defined below.
|
# by pressing Ctrl+C while the first trial was running. In this case, we just
|
||||||
completed_trials = [t for t in study.trials if t.state == TrialState.COMPLETE]
|
# re-raise the interrupt to invoke the standard handler defined below.
|
||||||
if not completed_trials:
|
completed_trials = [
|
||||||
raise KeyboardInterrupt
|
t for t in study.trials if t.state == TrialState.COMPLETE
|
||||||
|
]
|
||||||
|
if not completed_trials:
|
||||||
|
raise KeyboardInterrupt
|
||||||
|
|
||||||
# Get the Pareto front of trials. We can't use study.best_trials directly
|
# Get the Pareto front of trials. We can't use study.best_trials directly
|
||||||
# as get_score() doesn't return the pure KL divergence and refusal count.
|
# as get_score() doesn't return the pure KL divergence and refusal count.
|
||||||
# Note: Unlike study.best_trials, this does not handle objective constraints.
|
# Note: Unlike study.best_trials, this does not handle objective constraints.
|
||||||
sorted_trials = sorted(
|
sorted_trials = sorted(
|
||||||
completed_trials,
|
completed_trials,
|
||||||
key=lambda trial: (
|
key=lambda trial: (
|
||||||
trial.user_attrs["refusals"],
|
trial.user_attrs["refusals"],
|
||||||
trial.user_attrs["kl_divergence"],
|
trial.user_attrs["kl_divergence"],
|
||||||
),
|
|
||||||
)
|
|
||||||
min_divergence = math.inf
|
|
||||||
best_trials = []
|
|
||||||
for trial in sorted_trials:
|
|
||||||
kl_divergence = trial.user_attrs["kl_divergence"]
|
|
||||||
if kl_divergence < min_divergence:
|
|
||||||
min_divergence = kl_divergence
|
|
||||||
best_trials.append(trial)
|
|
||||||
|
|
||||||
choices = [
|
|
||||||
Choice(
|
|
||||||
title=(
|
|
||||||
f"[Trial {trial.user_attrs['index']:>3}] "
|
|
||||||
f"Refusals: {trial.user_attrs['refusals']:>2}/{len(evaluator.bad_prompts)}, "
|
|
||||||
f"KL divergence: {trial.user_attrs['kl_divergence']:.4f}"
|
|
||||||
),
|
),
|
||||||
value=trial,
|
|
||||||
)
|
)
|
||||||
for trial in best_trials
|
min_divergence = math.inf
|
||||||
]
|
best_trials = []
|
||||||
|
for trial in sorted_trials:
|
||||||
|
kl_divergence = trial.user_attrs["kl_divergence"]
|
||||||
|
if kl_divergence < min_divergence:
|
||||||
|
min_divergence = kl_divergence
|
||||||
|
best_trials.append(trial)
|
||||||
|
|
||||||
choices.append(
|
choices = [
|
||||||
Choice(
|
Choice(
|
||||||
title="Run additional trials",
|
title=(
|
||||||
value="continue",
|
f"[Trial {trial.user_attrs['index']:>3}] "
|
||||||
)
|
f"Refusals: {trial.user_attrs['refusals']:>2}/{len(evaluator.bad_prompts)}, "
|
||||||
)
|
f"KL divergence: {trial.user_attrs['kl_divergence']:.4f}"
|
||||||
|
),
|
||||||
|
value=trial,
|
||||||
|
)
|
||||||
|
for trial in best_trials
|
||||||
|
]
|
||||||
|
|
||||||
choices.append(
|
choices.append(
|
||||||
Choice(
|
Choice(
|
||||||
title="Exit program",
|
title="Run additional trials",
|
||||||
value="",
|
value="continue",
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
|
|
||||||
print()
|
choices.append(
|
||||||
print("[bold green]Optimization finished![/]")
|
Choice(
|
||||||
print()
|
title="Exit program",
|
||||||
print(
|
value="",
|
||||||
(
|
)
|
||||||
"The following trials resulted in Pareto optimal combinations of refusals and KL divergence. "
|
)
|
||||||
"After selecting a trial, you will be able to save the model, upload it to Hugging Face, "
|
|
||||||
"chat with it to test how well it works, or run standard benchmarks on it. "
|
print()
|
||||||
"You can return to this menu later to select a different trial. "
|
print("[bold green]Optimization finished![/]")
|
||||||
"[yellow]Note that KL divergence values above 0.5 usually indicate significant damage to the original model's capabilities.[/]"
|
print()
|
||||||
|
print(
|
||||||
|
(
|
||||||
|
"The following trials resulted in Pareto optimal combinations of refusals and KL divergence. "
|
||||||
|
"After selecting a trial, you will be able to save the model, upload it to Hugging Face, "
|
||||||
|
"chat with it to test how well it works, or run standard benchmarks on it. "
|
||||||
|
"You can return to this menu later to select a different trial. "
|
||||||
|
"[yellow]Note that KL divergence values above 0.5 usually indicate significant damage to the original model's capabilities.[/]"
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
print()
|
if reproduction_mode:
|
||||||
trial = prompt_select("Which trial do you want to use?", choices)
|
parameters = reproduction_information["parameters"]
|
||||||
|
metrics = reproduction_information["metrics"]
|
||||||
|
|
||||||
|
trial = create_trial(
|
||||||
|
values=[],
|
||||||
|
user_attrs={
|
||||||
|
"direction_index": parameters["direction_index"],
|
||||||
|
"parameters": parameters["abliteration_parameters"],
|
||||||
|
"kl_divergence": metrics["kl_divergence"],
|
||||||
|
"refusals": metrics["refusals"],
|
||||||
|
"base_refusals": metrics["base_refusals"],
|
||||||
|
"n_bad_prompts": metrics["n_bad_prompts"],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("Restoring model from reproduction information...")
|
||||||
|
else:
|
||||||
|
print()
|
||||||
|
trial = prompt_select("Which trial do you want to use?", choices)
|
||||||
|
|
||||||
|
if trial is None or trial == "":
|
||||||
|
return
|
||||||
|
|
||||||
|
if trial == "continue":
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
n_additional_trials = prompt_text(
|
||||||
|
"How many additional trials do you want to run?"
|
||||||
|
)
|
||||||
|
if n_additional_trials is None or n_additional_trials == "":
|
||||||
|
n_additional_trials = 0
|
||||||
|
break
|
||||||
|
n_additional_trials = int(n_additional_trials)
|
||||||
|
if n_additional_trials > 0:
|
||||||
|
break
|
||||||
|
print("[red]Please enter a number greater than 0.[/]")
|
||||||
|
except ValueError:
|
||||||
|
print("[red]Please enter a number.[/]")
|
||||||
|
|
||||||
|
if n_additional_trials == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
settings.n_trials += n_additional_trials
|
||||||
|
study.set_user_attr("settings", settings.model_dump_json())
|
||||||
|
study.set_user_attr("finished", False)
|
||||||
|
|
||||||
if trial == "continue":
|
|
||||||
while True:
|
|
||||||
try:
|
try:
|
||||||
n_additional_trials = prompt_text(
|
study.optimize(
|
||||||
"How many additional trials do you want to run?"
|
objective_wrapper,
|
||||||
|
n_trials=settings.n_trials - len(study.trials),
|
||||||
)
|
)
|
||||||
if n_additional_trials is None or n_additional_trials == "":
|
except KeyboardInterrupt:
|
||||||
n_additional_trials = 0
|
pass
|
||||||
break
|
|
||||||
n_additional_trials = int(n_additional_trials)
|
|
||||||
if n_additional_trials > 0:
|
|
||||||
break
|
|
||||||
print("[red]Please enter a number greater than 0.[/]")
|
|
||||||
except ValueError:
|
|
||||||
print("[red]Please enter a number.[/]")
|
|
||||||
|
|
||||||
if n_additional_trials == 0:
|
if len(study.trials) == settings.n_trials:
|
||||||
continue
|
study.set_user_attr("finished", True)
|
||||||
|
|
||||||
settings.n_trials += n_additional_trials
|
break
|
||||||
study.set_user_attr("settings", settings.model_dump_json())
|
|
||||||
study.set_user_attr("finished", False)
|
|
||||||
|
|
||||||
try:
|
print()
|
||||||
study.optimize(
|
print(
|
||||||
objective_wrapper,
|
f"Restoring model from trial [bold]{trial.user_attrs['index']}[/]..."
|
||||||
n_trials=settings.n_trials - count_completed_trials(),
|
)
|
||||||
)
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
pass
|
|
||||||
|
|
||||||
if count_completed_trials() == settings.n_trials:
|
|
||||||
study.set_user_attr("finished", True)
|
|
||||||
|
|
||||||
break
|
|
||||||
|
|
||||||
elif trial is None or trial == "":
|
|
||||||
return
|
|
||||||
|
|
||||||
print()
|
|
||||||
print(f"Restoring model from trial [bold]{trial.user_attrs['index']}[/]...")
|
|
||||||
print("* Parameters:")
|
print("* Parameters:")
|
||||||
for name, value in get_trial_parameters(trial).items():
|
for name, value in get_trial_parameters(trial).items():
|
||||||
print(f" * {name} = [bold]{value}[/]")
|
print(f" * {name} = [bold]{value}[/]")
|
||||||
|
|
||||||
# Per https://github.com/huggingface/peft/issues/868#issuecomment-1820642893 once a LoRA is merged it's
|
# Per https://github.com/huggingface/peft/issues/868#issuecomment-1820642893
|
||||||
# expected to be empty. Provide a utility function to restore the previous LoRA-ified state.
|
# once a LoRA is merged it's expected to be empty. Provide a utility function
|
||||||
def reset_trial_model() -> None:
|
# to restore the previous LoRA-ified state.
|
||||||
|
def reset_trial_model():
|
||||||
print("* Resetting model...")
|
print("* Resetting model...")
|
||||||
model.reset_model()
|
model.reset_model()
|
||||||
print("* Abliterating...")
|
print("* Abliterating...")
|
||||||
@@ -776,12 +855,20 @@ def run():
|
|||||||
"Upload the model to Hugging Face",
|
"Upload the model to Hugging Face",
|
||||||
"Chat with the model",
|
"Chat with the model",
|
||||||
"Benchmark the model",
|
"Benchmark the model",
|
||||||
"Return to the trial selection menu",
|
Choice(
|
||||||
|
title="Exit program"
|
||||||
|
if reproduction_mode
|
||||||
|
else "Return to the trial selection menu",
|
||||||
|
value="",
|
||||||
|
),
|
||||||
],
|
],
|
||||||
)
|
)
|
||||||
|
|
||||||
if action is None or action == "Return to the trial selection menu":
|
if action is None or action == "":
|
||||||
break
|
if reproduction_mode:
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
# All actions are wrapped in a try/except block so that if an error occurs,
|
# All actions are wrapped in a try/except block so that if an error occurs,
|
||||||
# another action can be tried, instead of the program crashing and losing
|
# another action can be tried, instead of the program crashing and losing
|
||||||
@@ -793,11 +880,11 @@ def run():
|
|||||||
if not save_directory:
|
if not save_directory:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
strategy = obtain_merge_strategy(settings, model)
|
strategy = obtain_export_strategy(settings, model)
|
||||||
if strategy is None:
|
if strategy is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if strategy == "adapter":
|
if strategy == ExportStrategy.ADAPTER:
|
||||||
print("Saving LoRA adapter...")
|
print("Saving LoRA adapter...")
|
||||||
model.model.save_pretrained(
|
model.model.save_pretrained(
|
||||||
save_directory,
|
save_directory,
|
||||||
@@ -813,10 +900,37 @@ def run():
|
|||||||
del merged_model
|
del merged_model
|
||||||
empty_cache()
|
empty_cache()
|
||||||
model.tokenizer.save_pretrained(save_directory)
|
model.tokenizer.save_pretrained(save_directory)
|
||||||
|
if model.processor is not None:
|
||||||
|
model.processor.save_pretrained(save_directory)
|
||||||
reset_trial_model()
|
reset_trial_model()
|
||||||
|
|
||||||
print(f"Model saved to [bold]{save_directory}[/].")
|
print(f"Model saved to [bold]{save_directory}[/].")
|
||||||
|
|
||||||
|
if reproduction_mode and verify_hashes:
|
||||||
|
print("Verifying hashes of weight files...")
|
||||||
|
|
||||||
|
for (
|
||||||
|
filename,
|
||||||
|
original_sha256,
|
||||||
|
) in reproduction_information["hashes"].items():
|
||||||
|
file_path = Path(save_directory) / filename
|
||||||
|
|
||||||
|
if file_path.exists():
|
||||||
|
sha256 = get_file_sha256(file_path)
|
||||||
|
|
||||||
|
if sha256.lower() == original_sha256.lower():
|
||||||
|
print(
|
||||||
|
f"[bold]{filename}:[/] [green]Hash matches[/]"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"[bold]{filename}:[/] [yellow]Hash doesn't match[/]"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"[bold]{filename}:[/] [red]File not found[/]"
|
||||||
|
)
|
||||||
|
|
||||||
case "Upload the model to Hugging Face":
|
case "Upload the model to Hugging Face":
|
||||||
# We don't use huggingface_hub.login() because that stores the token on disk,
|
# We don't use huggingface_hub.login() because that stores the token on disk,
|
||||||
# and since this program will often be run on rented or shared GPU servers,
|
# and since this program will often be run on rented or shared GPU servers,
|
||||||
@@ -851,7 +965,7 @@ def run():
|
|||||||
continue
|
continue
|
||||||
private = visibility == "Private"
|
private = visibility == "Private"
|
||||||
|
|
||||||
strategy = obtain_merge_strategy(settings, model)
|
strategy = obtain_export_strategy(settings, model)
|
||||||
if strategy is None:
|
if strategy is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -863,8 +977,10 @@ def run():
|
|||||||
settings.good_evaluation_prompts.dataset,
|
settings.good_evaluation_prompts.dataset,
|
||||||
settings.bad_evaluation_prompts.dataset,
|
settings.bad_evaluation_prompts.dataset,
|
||||||
]
|
]
|
||||||
is_reproducible = is_hf_path(settings.model) and all(
|
is_reproducible = (
|
||||||
is_hf_path(dataset) for dataset in datasets
|
is_hf_path(settings.model)
|
||||||
|
and all(is_hf_path(dataset) for dataset in datasets)
|
||||||
|
and not reproduction_mode
|
||||||
)
|
)
|
||||||
|
|
||||||
if is_reproducible:
|
if is_reproducible:
|
||||||
@@ -899,7 +1015,7 @@ def run():
|
|||||||
else:
|
else:
|
||||||
reproducibility_information = "none"
|
reproducibility_information = "none"
|
||||||
|
|
||||||
if strategy == "adapter":
|
if strategy == ExportStrategy.ADAPTER:
|
||||||
print("Uploading LoRA adapter...")
|
print("Uploading LoRA adapter...")
|
||||||
model.model.push_to_hub(
|
model.model.push_to_hub(
|
||||||
repo_id,
|
repo_id,
|
||||||
@@ -923,6 +1039,12 @@ def run():
|
|||||||
private=private,
|
private=private,
|
||||||
token=token,
|
token=token,
|
||||||
)
|
)
|
||||||
|
if model.processor is not None:
|
||||||
|
model.processor.push_to_hub(
|
||||||
|
repo_id,
|
||||||
|
private=private,
|
||||||
|
token=token,
|
||||||
|
)
|
||||||
reset_trial_model()
|
reset_trial_model()
|
||||||
|
|
||||||
if is_hf_path(settings.model):
|
if is_hf_path(settings.model):
|
||||||
@@ -961,21 +1083,77 @@ def run():
|
|||||||
if reproducibility_information != "none":
|
if reproducibility_information != "none":
|
||||||
# Set the number of trials to the number of actual completed trials
|
# Set the number of trials to the number of actual completed trials
|
||||||
# for the reproduction configuration.
|
# for the reproduction configuration.
|
||||||
settings.n_trials = count_completed_trials()
|
settings.n_trials = len(study.trials)
|
||||||
|
current_export_strategy = settings.export_strategy
|
||||||
|
settings.export_strategy = strategy
|
||||||
|
|
||||||
upload_reproduce_folder(
|
try:
|
||||||
repo_id,
|
upload_reproduce_folder(
|
||||||
settings,
|
repo_id,
|
||||||
token,
|
settings,
|
||||||
checkpoint_path=study_checkpoint_file,
|
token,
|
||||||
trial=trial,
|
checkpoint_path=study_checkpoint_file,
|
||||||
include_system_information=(
|
trial=trial,
|
||||||
reproducibility_information == "full"
|
include_system_information=(
|
||||||
),
|
reproducibility_information == "full"
|
||||||
)
|
),
|
||||||
|
)
|
||||||
|
finally:
|
||||||
|
settings.export_strategy = current_export_strategy
|
||||||
|
|
||||||
print(f"Model uploaded to [bold]{repo_id}[/].")
|
print(f"Model uploaded to [bold]{repo_id}[/].")
|
||||||
|
|
||||||
|
if reproduction_mode and verify_hashes:
|
||||||
|
print("Verifying hashes of weight files...")
|
||||||
|
|
||||||
|
api = HfApi()
|
||||||
|
model_info = api.model_info(
|
||||||
|
repo_id,
|
||||||
|
files_metadata=True,
|
||||||
|
token=token,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not model_info.siblings:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Could not fetch uploaded model hashes."
|
||||||
|
)
|
||||||
|
|
||||||
|
for (
|
||||||
|
filename,
|
||||||
|
original_sha256,
|
||||||
|
) in reproduction_information["hashes"].items():
|
||||||
|
file_found = False
|
||||||
|
|
||||||
|
for file in model_info.siblings:
|
||||||
|
if file.rfilename == filename:
|
||||||
|
sha256 = getattr(file, "lfs", {}).get(
|
||||||
|
"sha256"
|
||||||
|
)
|
||||||
|
if not sha256:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Could not fetch uploaded model hashes."
|
||||||
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
sha256.lower()
|
||||||
|
== original_sha256.lower()
|
||||||
|
):
|
||||||
|
print(
|
||||||
|
f"[bold]{filename}:[/] [green]Hash matches[/]"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
f"[bold]{filename}:[/] [yellow]Hash doesn't match[/]"
|
||||||
|
)
|
||||||
|
|
||||||
|
file_found = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not file_found:
|
||||||
|
print(
|
||||||
|
f"[bold]{filename}:[/] [red]File not found[/]"
|
||||||
|
)
|
||||||
|
|
||||||
case "Chat with the model":
|
case "Chat with the model":
|
||||||
print()
|
print()
|
||||||
print(
|
print(
|
||||||
@@ -1114,7 +1292,11 @@ def run():
|
|||||||
print(table)
|
print(table)
|
||||||
|
|
||||||
except Exception as error:
|
except Exception as error:
|
||||||
print(f"[red]Error: {error}[/]")
|
formatted = format_exception(error)
|
||||||
|
if "\n" in formatted:
|
||||||
|
print(f"[red]Error:\n{formatted}[/]")
|
||||||
|
else:
|
||||||
|
print(f"[red]Error: {formatted}[/]")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|||||||
+57
-22
@@ -17,12 +17,14 @@ from torch.nn import Module, ModuleList
|
|||||||
from transformers import (
|
from transformers import (
|
||||||
AutoModelForCausalLM,
|
AutoModelForCausalLM,
|
||||||
AutoModelForImageTextToText,
|
AutoModelForImageTextToText,
|
||||||
|
AutoProcessor,
|
||||||
AutoTokenizer,
|
AutoTokenizer,
|
||||||
BatchEncoding,
|
BatchEncoding,
|
||||||
BitsAndBytesConfig,
|
BitsAndBytesConfig,
|
||||||
PretrainedConfig,
|
PretrainedConfig,
|
||||||
PreTrainedModel,
|
PreTrainedModel,
|
||||||
PreTrainedTokenizerBase,
|
PreTrainedTokenizerBase,
|
||||||
|
ProcessorMixin,
|
||||||
TextStreamer,
|
TextStreamer,
|
||||||
)
|
)
|
||||||
from transformers.generation import (
|
from transformers.generation import (
|
||||||
@@ -31,7 +33,7 @@ from transformers.generation import (
|
|||||||
|
|
||||||
from .config import QuantizationMethod, RowNormalization, Settings
|
from .config import QuantizationMethod, RowNormalization, Settings
|
||||||
from .system import empty_cache
|
from .system import empty_cache
|
||||||
from .utils import Prompt, batchify, print
|
from .utils import Prompt, batchify, format_exception, print
|
||||||
|
|
||||||
|
|
||||||
def get_model_class(
|
def get_model_class(
|
||||||
@@ -56,7 +58,10 @@ class AbliterationParameters:
|
|||||||
class Model:
|
class Model:
|
||||||
model: PreTrainedModel | PeftModel
|
model: PreTrainedModel | PeftModel
|
||||||
tokenizer: PreTrainedTokenizerBase
|
tokenizer: PreTrainedTokenizerBase
|
||||||
|
# Set for multimodal models, None for text-only ones.
|
||||||
|
processor: ProcessorMixin | None
|
||||||
peft_config: LoraConfig
|
peft_config: LoraConfig
|
||||||
|
dtype: torch.dtype
|
||||||
|
|
||||||
def __init__(self, settings: Settings):
|
def __init__(self, settings: Settings):
|
||||||
self.settings = settings
|
self.settings = settings
|
||||||
@@ -71,10 +76,17 @@ class Model:
|
|||||||
|
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(
|
self.tokenizer = AutoTokenizer.from_pretrained(
|
||||||
settings.model,
|
settings.model,
|
||||||
trust_remote_code=settings.trust_remote_code,
|
|
||||||
**self.revision_kwargs,
|
**self.revision_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Multimodal models have a processor we'll want to save.
|
||||||
|
self.processor = None
|
||||||
|
if get_model_class(settings.model) == AutoModelForImageTextToText:
|
||||||
|
self.processor = AutoProcessor.from_pretrained(
|
||||||
|
settings.model,
|
||||||
|
**self.revision_kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
# Fallback for tokenizers that don't declare a special pad token.
|
# Fallback for tokenizers that don't declare a special pad token.
|
||||||
if self.tokenizer.pad_token is None:
|
if self.tokenizer.pad_token is None:
|
||||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||||
@@ -90,10 +102,8 @@ class Model:
|
|||||||
if settings.max_memory
|
if settings.max_memory
|
||||||
else None
|
else None
|
||||||
)
|
)
|
||||||
self.trusted_models = {settings.model: settings.trust_remote_code}
|
|
||||||
|
|
||||||
if self.settings.evaluate_model is not None:
|
self.trusted_models = set()
|
||||||
self.trusted_models[settings.evaluate_model] = settings.trust_remote_code
|
|
||||||
|
|
||||||
for dtype in settings.dtypes:
|
for dtype in settings.dtypes:
|
||||||
print(f"* Trying dtype [bold]{dtype}[/]...")
|
print(f"* Trying dtype [bold]{dtype}[/]...")
|
||||||
@@ -112,15 +122,19 @@ class Model:
|
|||||||
dtype=dtype,
|
dtype=dtype,
|
||||||
device_map=settings.device_map,
|
device_map=settings.device_map,
|
||||||
max_memory=self.max_memory,
|
max_memory=self.max_memory,
|
||||||
trust_remote_code=self.trusted_models.get(settings.model),
|
trust_remote_code=True
|
||||||
|
if settings.model in self.trusted_models
|
||||||
|
else None,
|
||||||
**self.revision_kwargs,
|
**self.revision_kwargs,
|
||||||
**extra_kwargs,
|
**extra_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
self.dtype = self.model.dtype
|
||||||
|
|
||||||
# If we reach this point and the model requires trust_remote_code,
|
# If we reach this point and the model requires trust_remote_code,
|
||||||
# either the user accepted, or settings.trust_remote_code is True.
|
# the user must have agreed when prompted to execute remote code,
|
||||||
if self.trusted_models.get(settings.model) is None:
|
# because from_pretrained raises an exception otherwise.
|
||||||
self.trusted_models[settings.model] = True
|
self.trusted_models.add(settings.model)
|
||||||
|
|
||||||
# A test run can reveal dtype-related problems such as the infamous
|
# A test run can reveal dtype-related problems such as the infamous
|
||||||
# "RuntimeError: probability tensor contains either `inf`, `nan` or element < 0"
|
# "RuntimeError: probability tensor contains either `inf`, `nan` or element < 0"
|
||||||
@@ -137,7 +151,13 @@ class Model:
|
|||||||
except Exception as error:
|
except Exception as error:
|
||||||
self.model = None # ty:ignore[invalid-assignment]
|
self.model = None # ty:ignore[invalid-assignment]
|
||||||
empty_cache()
|
empty_cache()
|
||||||
print(f"* [red]Failed[/] ({error})")
|
|
||||||
|
formatted = format_exception(error)
|
||||||
|
if "\n" in formatted:
|
||||||
|
print(f"* [red]Failed:\n{formatted}[/]")
|
||||||
|
else:
|
||||||
|
print(f"* [red]Failed ({formatted})[/]")
|
||||||
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if settings.quantization == QuantizationMethod.BNB_4BIT:
|
if settings.quantization == QuantizationMethod.BNB_4BIT:
|
||||||
@@ -264,7 +284,9 @@ class Model:
|
|||||||
self.settings.model,
|
self.settings.model,
|
||||||
torch_dtype=self.model.dtype,
|
torch_dtype=self.model.dtype,
|
||||||
device_map="cpu",
|
device_map="cpu",
|
||||||
trust_remote_code=self.trusted_models.get(self.settings.model),
|
trust_remote_code=True
|
||||||
|
if self.settings.model in self.trusted_models
|
||||||
|
else None,
|
||||||
**self.revision_kwargs,
|
**self.revision_kwargs,
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -300,33 +322,40 @@ class Model:
|
|||||||
- Slow path: If switching models or after merge_and_unload(),
|
- Slow path: If switching models or after merge_and_unload(),
|
||||||
performs full model reload with quantization config.
|
performs full model reload with quantization config.
|
||||||
"""
|
"""
|
||||||
current_model = getattr(self.model.config, "name_or_path", None)
|
|
||||||
|
# If a prior model load was interrupted/cancelled mid-process, self.model will be None.
|
||||||
|
current_model = None
|
||||||
|
if self.model is not None:
|
||||||
|
current_model = getattr(self.model.config, "name_or_path", None)
|
||||||
|
|
||||||
if current_model == self.settings.model and not self.needs_reload:
|
if current_model == self.settings.model and not self.needs_reload:
|
||||||
# Reset LoRA adapters to zero (identity transformation)
|
# Reset LoRA adapters to zero (identity transformation).
|
||||||
for name, module in self.model.named_modules():
|
for name, module in self.model.named_modules():
|
||||||
if "lora_B" in name and hasattr(module, "weight"):
|
if "lora_B" in name and hasattr(module, "weight"):
|
||||||
torch.nn.init.zeros_(module.weight)
|
torch.nn.init.zeros_(module.weight)
|
||||||
return
|
return
|
||||||
|
|
||||||
dtype = self.model.dtype
|
|
||||||
|
|
||||||
# Purge existing model object from memory to make space.
|
# Purge existing model object from memory to make space.
|
||||||
self.model = None # ty:ignore[invalid-assignment]
|
self.model = None # ty:ignore[invalid-assignment]
|
||||||
empty_cache()
|
empty_cache()
|
||||||
|
|
||||||
quantization_config = self._get_quantization_config(str(dtype).split(".")[-1])
|
quantization_config = self._get_quantization_config(
|
||||||
|
str(self.dtype).split(".")[-1]
|
||||||
|
)
|
||||||
|
|
||||||
# Build kwargs, only include quantization_config if it's not None
|
# Build kwargs, only include quantization_config if it's not None.
|
||||||
extra_kwargs = {}
|
extra_kwargs = {}
|
||||||
if quantization_config is not None:
|
if quantization_config is not None:
|
||||||
extra_kwargs["quantization_config"] = quantization_config
|
extra_kwargs["quantization_config"] = quantization_config
|
||||||
|
|
||||||
self.model = get_model_class(self.settings.model).from_pretrained(
|
self.model = get_model_class(self.settings.model).from_pretrained(
|
||||||
self.settings.model,
|
self.settings.model,
|
||||||
dtype=dtype,
|
dtype=self.dtype,
|
||||||
device_map=self.settings.device_map,
|
device_map=self.settings.device_map,
|
||||||
max_memory=self.max_memory,
|
max_memory=self.max_memory,
|
||||||
trust_remote_code=self.trusted_models.get(self.settings.model),
|
trust_remote_code=True
|
||||||
|
if self.settings.model in self.trusted_models
|
||||||
|
else None,
|
||||||
**self.revision_kwargs,
|
**self.revision_kwargs,
|
||||||
**extra_kwargs,
|
**extra_kwargs,
|
||||||
)
|
)
|
||||||
@@ -551,6 +580,10 @@ class Model:
|
|||||||
W = W - W_org
|
W = W - W_org
|
||||||
# Use a low-rank SVD to get an approximation of the matrix.
|
# Use a low-rank SVD to get an approximation of the matrix.
|
||||||
r = self.peft_config.r
|
r = self.peft_config.r
|
||||||
|
# svd_lowrank is randomized:
|
||||||
|
# https://github.com/pytorch/pytorch/blob/20919052303c0b5ba87f8bf7e19237dc33ab09d3/torch/_lowrank.py#L108-L109
|
||||||
|
# Reseed immediately before the call so restoring a trial is independent of RNG history.
|
||||||
|
torch.manual_seed(self.settings.seed)
|
||||||
U, S, Vh = torch.svd_lowrank(W, q=2 * r + 4, niter=6)
|
U, S, Vh = torch.svd_lowrank(W, q=2 * r + 4, niter=6)
|
||||||
# Truncate it to the part we want to store in the LoRA adapter.
|
# Truncate it to the part we want to store in the LoRA adapter.
|
||||||
# Note: svd_lowrank actually returns V, so transpose it to get Vh.
|
# Note: svd_lowrank actually returns V, so transpose it to get Vh.
|
||||||
@@ -747,7 +780,7 @@ class Model:
|
|||||||
_, outputs = self.generate(
|
_, outputs = self.generate(
|
||||||
prompts,
|
prompts,
|
||||||
max_new_tokens=1,
|
max_new_tokens=1,
|
||||||
output_scores=True,
|
output_logits=True,
|
||||||
return_dict_in_generate=True,
|
return_dict_in_generate=True,
|
||||||
use_cache=False,
|
use_cache=False,
|
||||||
)
|
)
|
||||||
@@ -757,8 +790,10 @@ class Model:
|
|||||||
outputs = cast(GenerateDecoderOnlyOutput, outputs)
|
outputs = cast(GenerateDecoderOnlyOutput, outputs)
|
||||||
|
|
||||||
# Logits for the first (only) generated token.
|
# Logits for the first (only) generated token.
|
||||||
# This cast is valid because we passed output_scores=True above.
|
# Use raw logits, not processed generation scores; processors can insert
|
||||||
logits = cast(tuple[FloatTensor], outputs.scores)[0]
|
# -inf for suppressed tokens, which can make KL divergence evaluate to NaN.
|
||||||
|
# This cast is valid because we passed output_logits=True above.
|
||||||
|
logits = cast(tuple[FloatTensor], outputs.logits)[0]
|
||||||
|
|
||||||
# The returned tensor has shape (prompt, token).
|
# The returned tensor has shape (prompt, token).
|
||||||
logprobs = F.log_softmax(logits, dim=-1)
|
logprobs = F.log_softmax(logits, dim=-1)
|
||||||
|
|||||||
+301
-2
@@ -1,13 +1,33 @@
|
|||||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||||
# Copyright (C) 2025-2026 Philipp Emanuel Weidmann <pew@worldwidemann.com> + contributors
|
# Copyright (C) 2025-2026 Philipp Emanuel Weidmann <pew@worldwidemann.com> + contributors
|
||||||
|
|
||||||
|
import json
|
||||||
|
import platform
|
||||||
|
import random
|
||||||
import shutil
|
import shutil
|
||||||
|
from dataclasses import asdict
|
||||||
|
from enum import IntEnum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import Any, cast
|
||||||
|
from urllib.request import urlopen
|
||||||
|
|
||||||
|
import cpuinfo
|
||||||
|
import torch
|
||||||
from huggingface_hub import HfApi, hf_hub_download
|
from huggingface_hub import HfApi, hf_hub_download
|
||||||
from huggingface_hub.utils import disable_progress_bars, enable_progress_bars
|
from huggingface_hub.utils import (
|
||||||
|
GatedRepoError,
|
||||||
|
disable_progress_bars,
|
||||||
|
enable_progress_bars,
|
||||||
|
)
|
||||||
|
from questionary import Choice
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
from .utils import print
|
from .system import (
|
||||||
|
get_accelerator_info_dict,
|
||||||
|
get_heretic_version_info,
|
||||||
|
get_requirements_dict,
|
||||||
|
)
|
||||||
|
from .utils import print, prompt_select
|
||||||
|
|
||||||
|
|
||||||
def collect_reproducibles(path: str):
|
def collect_reproducibles(path: str):
|
||||||
@@ -21,6 +41,7 @@ def collect_reproducibles(path: str):
|
|||||||
models = api.list_models(
|
models = api.list_models(
|
||||||
filter=["heretic", "reproducible"],
|
filter=["heretic", "reproducible"],
|
||||||
sort="created_at",
|
sort="created_at",
|
||||||
|
expand=["gated", "tags"],
|
||||||
)
|
)
|
||||||
|
|
||||||
found = 0
|
found = 0
|
||||||
@@ -35,6 +56,12 @@ def collect_reproducibles(path: str):
|
|||||||
if model.tags is not None and "gguf" in model.tags:
|
if model.tags is not None and "gguf" in model.tags:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
if model.gated:
|
||||||
|
try:
|
||||||
|
api.auth_check(model.id, repo_type="model")
|
||||||
|
except GatedRepoError:
|
||||||
|
continue
|
||||||
|
|
||||||
print(f"[bold]{model.id}[/]...", end="")
|
print(f"[bold]{model.id}[/]...", end="")
|
||||||
|
|
||||||
user, repository = model.id.split("/")
|
user, repository = model.id.split("/")
|
||||||
@@ -81,3 +108,275 @@ def collect_reproducibles(path: str):
|
|||||||
print(f"Found: [bold]{found}[/] files")
|
print(f"Found: [bold]{found}[/] files")
|
||||||
print(f"Downloaded: [bold]{downloaded}[/] files")
|
print(f"Downloaded: [bold]{downloaded}[/] files")
|
||||||
print(f"Already stored: [bold]{found - downloaded}[/] files")
|
print(f"Already stored: [bold]{found - downloaded}[/] files")
|
||||||
|
|
||||||
|
|
||||||
|
def load_reproduction_information(path: str) -> dict[str, Any]:
|
||||||
|
if path.lower().startswith(("http://", "https://")):
|
||||||
|
# The path is a URL on the web.
|
||||||
|
|
||||||
|
# Obtain raw download URL.
|
||||||
|
path = path.replace("/blob/", "/raw/") # Hugging Face, GitHub
|
||||||
|
path = path.replace("/src/branch/", "/raw/branch/") # Codeberg
|
||||||
|
|
||||||
|
json_str = urlopen(path).read().decode("utf-8")
|
||||||
|
else:
|
||||||
|
# The path is (assumed to be) a local file system path.
|
||||||
|
json_str = Path(path).read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
return json.loads(json_str)
|
||||||
|
|
||||||
|
|
||||||
|
class MismatchSeverity(IntEnum):
|
||||||
|
LOW = 1
|
||||||
|
MEDIUM = 2
|
||||||
|
HIGH = 3
|
||||||
|
CRITICAL = 4
|
||||||
|
|
||||||
|
def __rich__(self) -> str:
|
||||||
|
match self:
|
||||||
|
case MismatchSeverity.LOW:
|
||||||
|
return "[green]low[/]"
|
||||||
|
case MismatchSeverity.MEDIUM:
|
||||||
|
return "[yellow]medium[/]"
|
||||||
|
case MismatchSeverity.HIGH:
|
||||||
|
return "[red]high[/]"
|
||||||
|
case MismatchSeverity.CRITICAL:
|
||||||
|
return "[bold red]critical[/]"
|
||||||
|
case _:
|
||||||
|
raise ValueError(f"unknown MismatchSeverity value: {self}")
|
||||||
|
|
||||||
|
|
||||||
|
def get_package_mismatch_severity(package_name: str) -> MismatchSeverity:
|
||||||
|
if package_name in [
|
||||||
|
"heretic-llm",
|
||||||
|
]:
|
||||||
|
return MismatchSeverity.CRITICAL
|
||||||
|
elif package_name in [
|
||||||
|
"torch",
|
||||||
|
"transformers",
|
||||||
|
]:
|
||||||
|
return MismatchSeverity.HIGH
|
||||||
|
elif package_name in [
|
||||||
|
"accelerate",
|
||||||
|
"bitsandbytes",
|
||||||
|
"kernels",
|
||||||
|
"optuna",
|
||||||
|
"peft",
|
||||||
|
"tokenizers",
|
||||||
|
"triton",
|
||||||
|
]:
|
||||||
|
return MismatchSeverity.MEDIUM
|
||||||
|
else:
|
||||||
|
return MismatchSeverity.LOW
|
||||||
|
|
||||||
|
|
||||||
|
def format_version_information(version_information: dict[str, Any]) -> str:
|
||||||
|
version = version_information["version"]
|
||||||
|
metadata = version_information["metadata"]
|
||||||
|
|
||||||
|
if "type" in metadata:
|
||||||
|
match metadata["type"]:
|
||||||
|
case "pypi":
|
||||||
|
return version
|
||||||
|
case "git":
|
||||||
|
return f"{version}-git+{metadata['url']}@{metadata['commit_hash']}"
|
||||||
|
case "local":
|
||||||
|
# Append a random number to ensure that two local installations
|
||||||
|
# are always considered to be different versions.
|
||||||
|
return f"{version}-local-{random.randint(2**16, 2**17)}"
|
||||||
|
case _:
|
||||||
|
raise ValueError(
|
||||||
|
f"unknown metadata.type value in version information: {metadata['type']}"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
return f"{version}-unknown-{random.randint(2**16, 2**17)}"
|
||||||
|
|
||||||
|
|
||||||
|
def check_environment(reproduction_information: dict[str, Any]) -> bool:
|
||||||
|
mismatch_severity: MismatchSeverity | None = None
|
||||||
|
|
||||||
|
system_mismatches = []
|
||||||
|
package_mismatches = []
|
||||||
|
|
||||||
|
def verify(
|
||||||
|
mismatch_list: list[tuple[str, Any, Any, MismatchSeverity]],
|
||||||
|
name: str,
|
||||||
|
this: Any,
|
||||||
|
original: Any,
|
||||||
|
severity: MismatchSeverity,
|
||||||
|
):
|
||||||
|
nonlocal mismatch_severity
|
||||||
|
if this != original:
|
||||||
|
mismatch_list.append((name, this, original, severity))
|
||||||
|
if mismatch_severity is None:
|
||||||
|
mismatch_severity = severity
|
||||||
|
else:
|
||||||
|
mismatch_severity = max(severity, mismatch_severity)
|
||||||
|
|
||||||
|
if "system" in reproduction_information:
|
||||||
|
system = reproduction_information["system"]
|
||||||
|
|
||||||
|
verify(
|
||||||
|
system_mismatches,
|
||||||
|
"Python version",
|
||||||
|
platform.python_version(),
|
||||||
|
system["python"]["version"],
|
||||||
|
MismatchSeverity.LOW,
|
||||||
|
)
|
||||||
|
|
||||||
|
verify(
|
||||||
|
system_mismatches,
|
||||||
|
"Operating system",
|
||||||
|
platform.platform(),
|
||||||
|
system["os"]["platform"],
|
||||||
|
MismatchSeverity.LOW,
|
||||||
|
)
|
||||||
|
|
||||||
|
verify(
|
||||||
|
system_mismatches,
|
||||||
|
"CPU",
|
||||||
|
cpuinfo.get_cpu_info().get("brand_raw"),
|
||||||
|
system["cpu"]["brand"],
|
||||||
|
MismatchSeverity.LOW,
|
||||||
|
)
|
||||||
|
|
||||||
|
accelerators = get_accelerator_info_dict()
|
||||||
|
|
||||||
|
verify(
|
||||||
|
system_mismatches,
|
||||||
|
"Accelerator type",
|
||||||
|
accelerators["type"],
|
||||||
|
system["accelerators"]["type"],
|
||||||
|
MismatchSeverity.HIGH,
|
||||||
|
)
|
||||||
|
|
||||||
|
if (
|
||||||
|
accelerators["type"]
|
||||||
|
and accelerators["type"] == system["accelerators"]["type"]
|
||||||
|
):
|
||||||
|
verify(
|
||||||
|
system_mismatches,
|
||||||
|
accelerators["api_name"],
|
||||||
|
accelerators["api_version"],
|
||||||
|
system["accelerators"]["api_version"],
|
||||||
|
MismatchSeverity.MEDIUM,
|
||||||
|
)
|
||||||
|
verify(
|
||||||
|
system_mismatches,
|
||||||
|
"Driver version",
|
||||||
|
accelerators["driver_version"],
|
||||||
|
system["accelerators"]["driver_version"],
|
||||||
|
MismatchSeverity.MEDIUM,
|
||||||
|
)
|
||||||
|
verify(
|
||||||
|
system_mismatches,
|
||||||
|
"Devices",
|
||||||
|
"\n".join([device["name"] for device in accelerators["devices"]]),
|
||||||
|
"\n".join(
|
||||||
|
[device["name"] for device in system["accelerators"]["devices"]]
|
||||||
|
),
|
||||||
|
MismatchSeverity.MEDIUM,
|
||||||
|
)
|
||||||
|
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
(
|
||||||
|
"[yellow]The provided JSON file does not contain system information. "
|
||||||
|
"Some system parameters can affect reproducibility, but due to the lack of system information, "
|
||||||
|
"Heretic is unable to verify that those parameters match the original environment. "
|
||||||
|
"Reproduction may or may not produce a byte-for-byte identical model.[/]"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
requirements = get_requirements_dict()
|
||||||
|
requirements["heretic-llm"] = format_version_information(
|
||||||
|
asdict(get_heretic_version_info())
|
||||||
|
)
|
||||||
|
requirements["torch"] = torch.__version__
|
||||||
|
|
||||||
|
original_requirements = reproduction_information["environment"]["requirements"]
|
||||||
|
original_requirements["heretic-llm"] = format_version_information(
|
||||||
|
reproduction_information["environment"]["heretic"]
|
||||||
|
)
|
||||||
|
original_requirements["torch"] = reproduction_information["environment"][
|
||||||
|
"pytorch_version"
|
||||||
|
]
|
||||||
|
|
||||||
|
package_names = sorted(requirements.keys() | original_requirements.keys())
|
||||||
|
|
||||||
|
for package_name in package_names:
|
||||||
|
verify(
|
||||||
|
package_mismatches,
|
||||||
|
package_name,
|
||||||
|
requirements.get(package_name),
|
||||||
|
original_requirements.get(package_name),
|
||||||
|
get_package_mismatch_severity(package_name),
|
||||||
|
)
|
||||||
|
|
||||||
|
if system_mismatches or package_mismatches:
|
||||||
|
print()
|
||||||
|
print(
|
||||||
|
(
|
||||||
|
"[yellow]Your local environment doesn't perfectly match the environment "
|
||||||
|
"used to produce the original model. The following components differ:[/]"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if system_mismatches:
|
||||||
|
table = Table()
|
||||||
|
table.add_column("Component")
|
||||||
|
table.add_column("This system", overflow="fold")
|
||||||
|
table.add_column("Original system", overflow="fold")
|
||||||
|
table.add_column("Severity", width=8)
|
||||||
|
|
||||||
|
for component, this, original, severity in system_mismatches:
|
||||||
|
table.add_row(f"[bold]{component}[/]", this, original, severity)
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("[bold]System Mismatches[/]")
|
||||||
|
print(table)
|
||||||
|
|
||||||
|
if package_mismatches:
|
||||||
|
table = Table()
|
||||||
|
table.add_column("Package")
|
||||||
|
table.add_column("This system", overflow="fold")
|
||||||
|
table.add_column("Original system", overflow="fold")
|
||||||
|
table.add_column("Severity", width=8)
|
||||||
|
|
||||||
|
for package, this, original, severity in package_mismatches:
|
||||||
|
table.add_row(f"[bold]{package}[/]", this, original, severity)
|
||||||
|
|
||||||
|
print()
|
||||||
|
print("[bold]Package Mismatches[/]")
|
||||||
|
print(table)
|
||||||
|
|
||||||
|
if system_mismatches or package_mismatches:
|
||||||
|
print()
|
||||||
|
print(
|
||||||
|
(
|
||||||
|
f"There is a {cast(MismatchSeverity, mismatch_severity).__rich__()} chance "
|
||||||
|
"that reproduction won't produce a byte-for-byte identical model. "
|
||||||
|
"However, the resulting model will very likely still behave similarly "
|
||||||
|
"to the original model."
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
print()
|
||||||
|
choice = prompt_select(
|
||||||
|
"How would you like to proceed?",
|
||||||
|
[
|
||||||
|
Choice(
|
||||||
|
title="Attempt to reproduce the model anyway",
|
||||||
|
value=True,
|
||||||
|
),
|
||||||
|
Choice(
|
||||||
|
title="Exit program",
|
||||||
|
value=False,
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
return choice
|
||||||
|
else:
|
||||||
|
# There are no mismatches at all, so there is nothing to confirm.
|
||||||
|
return True
|
||||||
|
|||||||
+46
-10
@@ -2,11 +2,13 @@
|
|||||||
# Copyright (C) 2025-2026 Philipp Emanuel Weidmann <pew@worldwidemann.com> + contributors
|
# Copyright (C) 2025-2026 Philipp Emanuel Weidmann <pew@worldwidemann.com> + contributors
|
||||||
|
|
||||||
import getpass
|
import getpass
|
||||||
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import platform
|
import platform
|
||||||
import random
|
import random
|
||||||
import tempfile
|
import tempfile
|
||||||
|
import traceback
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from importlib.metadata import version
|
from importlib.metadata import version
|
||||||
@@ -24,6 +26,7 @@ from datasets.download.download_manager import DownloadMode
|
|||||||
from datasets.utils.info_utils import VerificationMode
|
from datasets.utils.info_utils import VerificationMode
|
||||||
from huggingface_hub.utils import validate_repo_id
|
from huggingface_hub.utils import validate_repo_id
|
||||||
from optuna import Trial
|
from optuna import Trial
|
||||||
|
from optuna.trial import FrozenTrial
|
||||||
from psutil import Process
|
from psutil import Process
|
||||||
from questionary import Choice, Style
|
from questionary import Choice, Style
|
||||||
from rich.console import Console
|
from rich.console import Console
|
||||||
@@ -170,10 +173,23 @@ def format_duration(seconds: float) -> str:
|
|||||||
return f"{seconds}s"
|
return f"{seconds}s"
|
||||||
|
|
||||||
|
|
||||||
|
def format_exception(error: Exception) -> str:
|
||||||
|
# Walk causal chain to find a non-empty message.
|
||||||
|
current = error
|
||||||
|
while current is not None:
|
||||||
|
message = str(current).strip()
|
||||||
|
if message:
|
||||||
|
return message
|
||||||
|
current = current.__cause__ or current.__context__
|
||||||
|
|
||||||
|
# If there is no message in the entire causal chain, fall back to the complete traceback.
|
||||||
|
return traceback.format_exc().strip()
|
||||||
|
|
||||||
|
|
||||||
def is_hf_path(path: str) -> bool:
|
def is_hf_path(path: str) -> bool:
|
||||||
"""Checks whether a path likely refers to a Hugging Face repository."""
|
"""Checks whether a path likely refers to a Hugging Face repository."""
|
||||||
|
|
||||||
# Match Transformers: existing local paths take precedence over Hub lookup,
|
# Match Transformers: Existing local paths take precedence over Hub lookup,
|
||||||
# even if the path string is also a valid repository ID.
|
# even if the path string is also a valid repository ID.
|
||||||
if Path(path).exists():
|
if Path(path).exists():
|
||||||
return False
|
return False
|
||||||
@@ -193,12 +209,15 @@ def get_split_slice(split_str: str, length: int) -> tuple[int, int]:
|
|||||||
|
|
||||||
# The split name is the part before the slice, e.g. "train" in "train[:400]".
|
# The split name is the part before the slice, e.g. "train" in "train[:400]".
|
||||||
split_name = split_str.split("[")[0]
|
split_name = split_str.split("[")[0]
|
||||||
|
|
||||||
# Associate the split with its number of examples (lines).
|
# Associate the split with its number of examples (lines).
|
||||||
name_to_length = {split_name: length}
|
name_to_length = {split_name: length}
|
||||||
|
|
||||||
# Convert the instructions to absolute indices and select the first one.
|
# Convert the instructions to absolute indices and select the first one.
|
||||||
absolute_instruction = ReadInstruction.from_spec(split_str).to_absolute(
|
absolute_instruction = ReadInstruction.from_spec(split_str).to_absolute(
|
||||||
name_to_length
|
name_to_length
|
||||||
)[0]
|
)[0]
|
||||||
|
|
||||||
return absolute_instruction.from_, absolute_instruction.to
|
return absolute_instruction.from_, absolute_instruction.to
|
||||||
|
|
||||||
|
|
||||||
@@ -285,7 +304,7 @@ def batchify(items: list[T], batch_size: int) -> list[list[T]]:
|
|||||||
return [items[i : i + batch_size] for i in range(0, len(items), batch_size)]
|
return [items[i : i + batch_size] for i in range(0, len(items), batch_size)]
|
||||||
|
|
||||||
|
|
||||||
def get_trial_parameters(trial: Trial) -> dict[str, str]:
|
def get_trial_parameters(trial: Trial | FrozenTrial) -> dict[str, str]:
|
||||||
params = {}
|
params = {}
|
||||||
|
|
||||||
direction_index = trial.user_attrs["direction_index"]
|
direction_index = trial.user_attrs["direction_index"]
|
||||||
@@ -302,7 +321,7 @@ def get_trial_parameters(trial: Trial) -> dict[str, str]:
|
|||||||
|
|
||||||
def get_readme_intro(
|
def get_readme_intro(
|
||||||
settings: Settings,
|
settings: Settings,
|
||||||
trial: Trial,
|
trial: Trial | FrozenTrial,
|
||||||
contains_reproducibility_information: bool,
|
contains_reproducibility_information: bool,
|
||||||
) -> str:
|
) -> str:
|
||||||
if is_hf_path(settings.model):
|
if is_hf_path(settings.model):
|
||||||
@@ -323,7 +342,7 @@ def get_readme_intro(
|
|||||||
|
|
||||||
return f"""# This is a decensored version of {
|
return f"""# This is a decensored version of {
|
||||||
model_link
|
model_link
|
||||||
}, made using [Heretic](https://github.com/p-e-w/heretic) v{version("heretic-llm")}
|
}, made using [Heretic](https://heretic-project.org) v{version("heretic-llm")}
|
||||||
{reproducibility_instructions}
|
{reproducibility_instructions}
|
||||||
## Abliteration parameters
|
## Abliteration parameters
|
||||||
|
|
||||||
@@ -394,7 +413,7 @@ def format_hf_link(
|
|||||||
def generate_reproduce_readme(
|
def generate_reproduce_readme(
|
||||||
settings: Settings,
|
settings: Settings,
|
||||||
checkpoint_filename: str,
|
checkpoint_filename: str,
|
||||||
trial: Trial,
|
trial: Trial | FrozenTrial,
|
||||||
include_system_information: bool,
|
include_system_information: bool,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Generates the contents of a README.md for the reproduce/ folder."""
|
"""Generates the contents of a README.md for the reproduce/ folder."""
|
||||||
@@ -546,13 +565,18 @@ This directory contains the necessary information and assets to reproduce the re
|
|||||||
|
|
||||||
## How to reproduce
|
## How to reproduce
|
||||||
|
|
||||||
|
> [!TIP]
|
||||||
|
> You can automate this process, including all verification steps, by downloading the `reproduce.json` file and running
|
||||||
|
> `heretic --reproduce reproduce.json`.
|
||||||
|
|
||||||
{system_instructions}1. Install the exact version of Heretic indicated in the **Environment** section above, from its original source.
|
{system_instructions}1. Install the exact version of Heretic indicated in the **Environment** section above, from its original source.
|
||||||
1. Install the packages listed in `requirements.txt`: `pip install -r requirements.txt`
|
1. Install the packages listed in `requirements.txt`: `pip install -r requirements.txt`
|
||||||
1. Install the correct version of PyTorch: `{pytorch_install_command}`
|
1. Install the correct version of PyTorch: `{pytorch_install_command}`
|
||||||
1. Place the provided `config.toml` in your working directory.
|
1. Place the provided `config.toml` in your working directory.
|
||||||
1. Run Heretic without any additional arguments: `heretic`
|
1. Run Heretic without any additional arguments: `heretic`
|
||||||
1. Wait for the run to finish, then select trial **{trial.user_attrs["index"]}** and export the model.
|
1. Wait for the run to finish, then select trial **{trial.user_attrs["index"]}** and export the model.
|
||||||
1. Verify that the weight files have been exactly reproduced by comparing their SHA-256 hashes against those in `SHA256SUMS`: `sha256sum -c SHA256SUMS` (or look at the hashes online if you uploaded to Hugging Face)
|
1. Verify that the weight files have been exactly reproduced by comparing their SHA-256 hashes against those in `SHA256SUMS`:
|
||||||
|
`sha256sum -c SHA256SUMS` (or look at the hashes online if you uploaded to Hugging Face)
|
||||||
|
|
||||||
> [!TIP]
|
> [!TIP]
|
||||||
> To use the included Optuna study journal `{checkpoint_filename}`, place it in the checkpoints directory (usually `checkpoints/`) before running Heretic.
|
> To use the included Optuna study journal `{checkpoint_filename}`, place it in the checkpoints directory (usually `checkpoints/`) before running Heretic.
|
||||||
@@ -563,7 +587,7 @@ This directory contains the necessary information and assets to reproduce the re
|
|||||||
|
|
||||||
def generate_reproduce_json(
|
def generate_reproduce_json(
|
||||||
settings: Settings,
|
settings: Settings,
|
||||||
trial: Trial,
|
trial: Trial | FrozenTrial,
|
||||||
timestamp: str,
|
timestamp: str,
|
||||||
uploaded_model_hashes: dict[str, str],
|
uploaded_model_hashes: dict[str, str],
|
||||||
include_system_information: bool,
|
include_system_information: bool,
|
||||||
@@ -573,7 +597,7 @@ def generate_reproduce_json(
|
|||||||
version_info = get_heretic_version_info()
|
version_info = get_heretic_version_info()
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"version": "1", # Version number of the reproduce.json file format, to allow for future changes.
|
"version": "2", # Version number of the reproduce.json file format, to allow for future changes.
|
||||||
"timestamp": timestamp,
|
"timestamp": timestamp,
|
||||||
"system": None, # Defined here to preserve insertion order.
|
"system": None, # Defined here to preserve insertion order.
|
||||||
"environment": {
|
"environment": {
|
||||||
@@ -627,11 +651,23 @@ def generate_sha256sums(hashes: dict[str, str]) -> str:
|
|||||||
return "\n".join(lines) + "\n"
|
return "\n".join(lines) + "\n"
|
||||||
|
|
||||||
|
|
||||||
|
# TODO: Replace this with hashlib.file_digest when we drop support for Python 3.10.
|
||||||
|
def get_file_sha256(file_path: str | Path) -> str:
|
||||||
|
hash = hashlib.sha256()
|
||||||
|
|
||||||
|
with open(file_path, "rb") as file:
|
||||||
|
# Read the file in 64 kB blocks.
|
||||||
|
for block in iter(lambda: file.read(65536), b""):
|
||||||
|
hash.update(block)
|
||||||
|
|
||||||
|
return hash.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
def create_reproduce_folder(
|
def create_reproduce_folder(
|
||||||
path: Path,
|
path: Path,
|
||||||
settings: Settings,
|
settings: Settings,
|
||||||
checkpoint_path: str | Path,
|
checkpoint_path: str | Path,
|
||||||
trial: Trial,
|
trial: Trial | FrozenTrial,
|
||||||
uploaded_model_hashes: dict[str, str],
|
uploaded_model_hashes: dict[str, str],
|
||||||
include_system_information: bool,
|
include_system_information: bool,
|
||||||
):
|
):
|
||||||
@@ -705,7 +741,7 @@ def upload_reproduce_folder(
|
|||||||
settings: Settings,
|
settings: Settings,
|
||||||
token: str,
|
token: str,
|
||||||
checkpoint_path: str | Path,
|
checkpoint_path: str | Path,
|
||||||
trial: Trial,
|
trial: Trial | FrozenTrial,
|
||||||
include_system_information: bool,
|
include_system_information: bool,
|
||||||
):
|
):
|
||||||
api = huggingface_hub.HfApi()
|
api = huggingface_hub.HfApi()
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ resolution-markers = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[options]
|
[options]
|
||||||
exclude-newer = "2026-05-28T06:40:14.509192809Z"
|
exclude-newer = "0001-01-01T00:00:00Z" # This has no effect and is included for backwards compatibility when using relative exclude-newer values.
|
||||||
exclude-newer-span = "P7D"
|
exclude-newer-span = "P7D"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -2624,59 +2624,59 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pyarrow"
|
name = "pyarrow"
|
||||||
version = "22.0.0"
|
version = "23.0.1"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/30/53/04a7fdc63e6056116c9ddc8b43bc28c12cdd181b85cbeadb79278475f3ae/pyarrow-22.0.0.tar.gz", hash = "sha256:3d600dc583260d845c7d8a6db540339dd883081925da2bd1c5cb808f720b3cd9", size = 1151151, upload-time = "2025-10-24T12:30:00.762Z" }
|
sdist = { url = "https://files.pythonhosted.org/packages/88/22/134986a4cc224d593c1afde5494d18ff629393d74cc2eddb176669f234a4/pyarrow-23.0.1.tar.gz", hash = "sha256:b8c5873e33440b2bc2f4a79d2b47017a89c5a24116c055625e6f2ee50523f019", size = 1167336, upload-time = "2026-02-16T10:14:12.39Z" }
|
||||||
wheels = [
|
wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/d9/9b/cb3f7e0a345353def531ca879053e9ef6b9f38ed91aebcf68b09ba54dec0/pyarrow-22.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:77718810bd3066158db1e95a63c160ad7ce08c6b0710bc656055033e39cdad88", size = 34223968, upload-time = "2025-10-24T10:03:31.21Z" },
|
{ url = "https://files.pythonhosted.org/packages/bc/a8/24e5dc6855f50a62936ceb004e6e9645e4219a8065f304145d7fb8a79d5d/pyarrow-23.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:3fab8f82571844eb3c460f90a75583801d14ca0cc32b1acc8c361650e006fd56", size = 34307390, upload-time = "2026-02-16T10:08:08.654Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/6c/41/3184b8192a120306270c5307f105b70320fdaa592c99843c5ef78aaefdcf/pyarrow-22.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:44d2d26cda26d18f7af7db71453b7b783788322d756e81730acb98f24eb90ace", size = 35942085, upload-time = "2025-10-24T10:03:38.146Z" },
|
{ url = "https://files.pythonhosted.org/packages/bc/8e/4be5617b4aaae0287f621ad31c6036e5f63118cfca0dc57d42121ff49b51/pyarrow-23.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:3f91c038b95f71ddfc865f11d5876c42f343b4495535bd262c7b321b0b94507c", size = 35853761, upload-time = "2026-02-16T10:08:17.811Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/d9/3d/a1eab2f6f08001f9fb714b8ed5cfb045e2fe3e3e3c0c221f2c9ed1e6d67d/pyarrow-22.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:b9d71701ce97c95480fecb0039ec5bb889e75f110da72005743451339262f4ce", size = 44964613, upload-time = "2025-10-24T10:03:46.516Z" },
|
{ url = "https://files.pythonhosted.org/packages/2e/08/3e56a18819462210432ae37d10f5c8eed3828be1d6c751b6e6a2e93c286a/pyarrow-23.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:d0744403adabef53c985a7f8a082b502a368510c40d184df349a0a8754533258", size = 44493116, upload-time = "2026-02-16T10:08:25.792Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/46/46/a1d9c24baf21cfd9ce994ac820a24608decf2710521b29223d4334985127/pyarrow-22.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:710624ab925dc2b05a6229d47f6f0dac1c1155e6ed559be7109f684eba048a48", size = 47627059, upload-time = "2025-10-24T10:03:55.353Z" },
|
{ url = "https://files.pythonhosted.org/packages/f8/82/c40b68001dbec8a3faa4c08cd8c200798ac732d2854537c5449dc859f55a/pyarrow-23.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:c33b5bf406284fd0bba436ed6f6c3ebe8e311722b441d89397c54f871c6863a2", size = 47564532, upload-time = "2026-02-16T10:08:34.27Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/3a/4c/f711acb13075c1391fd54bc17e078587672c575f8de2a6e62509af026dcf/pyarrow-22.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f963ba8c3b0199f9d6b794c90ec77545e05eadc83973897a4523c9e8d84e9340", size = 47947043, upload-time = "2025-10-24T10:04:05.408Z" },
|
{ url = "https://files.pythonhosted.org/packages/20/bc/73f611989116b6f53347581b02177f9f620efdf3cd3f405d0e83cdf53a83/pyarrow-23.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ddf743e82f69dcd6dbbcb63628895d7161e04e56794ef80550ac6f3315eeb1d5", size = 48183685, upload-time = "2026-02-16T10:08:42.889Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/4e/70/1f3180dd7c2eab35c2aca2b29ace6c519f827dcd4cfeb8e0dca41612cf7a/pyarrow-22.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bd0d42297ace400d8febe55f13fdf46e86754842b860c978dfec16f081e5c653", size = 50206505, upload-time = "2025-10-24T10:04:15.786Z" },
|
{ url = "https://files.pythonhosted.org/packages/b0/cc/6c6b3ecdae2a8c3aced99956187e8302fc954cc2cca2a37cf2111dad16ce/pyarrow-23.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e052a211c5ac9848ae15d5ec875ed0943c0221e2fcfe69eee80b604b4e703222", size = 50605582, upload-time = "2026-02-16T10:08:51.641Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/80/07/fea6578112c8c60ffde55883a571e4c4c6bc7049f119d6b09333b5cc6f73/pyarrow-22.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:00626d9dc0f5ef3a75fe63fd68b9c7c8302d2b5bbc7f74ecaedba83447a24f84", size = 28101641, upload-time = "2025-10-24T10:04:22.57Z" },
|
{ url = "https://files.pythonhosted.org/packages/8d/94/d359e708672878d7638a04a0448edf7c707f9e5606cee11e15aaa5c7535a/pyarrow-23.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:5abde149bb3ce524782d838eb67ac095cd3fd6090eba051130589793f1a7f76d", size = 27521148, upload-time = "2026-02-16T10:08:58.077Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/2e/b7/18f611a8cdc43417f9394a3ccd3eace2f32183c08b9eddc3d17681819f37/pyarrow-22.0.0-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:3e294c5eadfb93d78b0763e859a0c16d4051fc1c5231ae8956d61cb0b5666f5a", size = 34272022, upload-time = "2025-10-24T10:04:28.973Z" },
|
{ url = "https://files.pythonhosted.org/packages/b0/41/8e6b6ef7e225d4ceead8459427a52afdc23379768f54dd3566014d7618c1/pyarrow-23.0.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:6f0147ee9e0386f519c952cc670eb4a8b05caa594eeffe01af0e25f699e4e9bb", size = 34302230, upload-time = "2026-02-16T10:09:03.859Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/26/5c/f259e2526c67eb4b9e511741b19870a02363a47a35edbebc55c3178db22d/pyarrow-22.0.0-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:69763ab2445f632d90b504a815a2a033f74332997052b721002298ed6de40f2e", size = 35995834, upload-time = "2025-10-24T10:04:35.467Z" },
|
{ url = "https://files.pythonhosted.org/packages/bf/4a/1472c00392f521fea03ae93408bf445cc7bfa1ab81683faf9bc188e36629/pyarrow-23.0.1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:0ae6e17c828455b6265d590100c295193f93cc5675eb0af59e49dbd00d2de350", size = 35850050, upload-time = "2026-02-16T10:09:11.877Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/50/8d/281f0f9b9376d4b7f146913b26fac0aa2829cd1ee7e997f53a27411bbb92/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:b41f37cabfe2463232684de44bad753d6be08a7a072f6a83447eeaf0e4d2a215", size = 45030348, upload-time = "2025-10-24T10:04:43.366Z" },
|
{ url = "https://files.pythonhosted.org/packages/0c/b2/bd1f2f05ded56af7f54d702c8364c9c43cd6abb91b0e9933f3d77b4f4132/pyarrow-23.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:fed7020203e9ef273360b9e45be52a2a47d3103caf156a30ace5247ffb51bdbd", size = 44491918, upload-time = "2026-02-16T10:09:18.144Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/f5/e5/53c0a1c428f0976bf22f513d79c73000926cb00b9c138d8e02daf2102e18/pyarrow-22.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:35ad0f0378c9359b3f297299c3309778bb03b8612f987399a0333a560b43862d", size = 47699480, upload-time = "2025-10-24T10:04:51.486Z" },
|
{ url = "https://files.pythonhosted.org/packages/0b/62/96459ef5b67957eac38a90f541d1c28833d1b367f014a482cb63f3b7cd2d/pyarrow-23.0.1-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:26d50dee49d741ac0e82185033488d28d35be4d763ae6f321f97d1140eb7a0e9", size = 47562811, upload-time = "2026-02-16T10:09:25.792Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/95/e1/9dbe4c465c3365959d183e6345d0a8d1dc5b02ca3f8db4760b3bc834cf25/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8382ad21458075c2e66a82a29d650f963ce51c7708c7c0ff313a8c206c4fd5e8", size = 48011148, upload-time = "2025-10-24T10:04:59.585Z" },
|
{ url = "https://files.pythonhosted.org/packages/7d/94/1170e235add1f5f45a954e26cd0e906e7e74e23392dcb560de471f7366ec/pyarrow-23.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:3c30143b17161310f151f4a2bcfe41b5ff744238c1039338779424e38579d701", size = 48183766, upload-time = "2026-02-16T10:09:34.645Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/c5/b4/7caf5d21930061444c3cf4fa7535c82faf5263e22ce43af7c2759ceb5b8b/pyarrow-22.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:1a812a5b727bc09c3d7ea072c4eebf657c2f7066155506ba31ebf4792f88f016", size = 50276964, upload-time = "2025-10-24T10:05:08.175Z" },
|
{ url = "https://files.pythonhosted.org/packages/0e/2d/39a42af4570377b99774cdb47f63ee6c7da7616bd55b3d5001aa18edfe4f/pyarrow-23.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:db2190fa79c80a23fdd29fef4b8992893f024ae7c17d2f5f4db7171fa30c2c78", size = 50607669, upload-time = "2026-02-16T10:09:44.153Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/ae/f3/cec89bd99fa3abf826f14d4e53d3d11340ce6f6af4d14bdcd54cd83b6576/pyarrow-22.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:ec5d40dd494882704fb876c16fa7261a69791e784ae34e6b5992e977bd2e238c", size = 28106517, upload-time = "2025-10-24T10:05:14.314Z" },
|
{ url = "https://files.pythonhosted.org/packages/00/ca/db94101c187f3df742133ac837e93b1f269ebdac49427f8310ee40b6a58f/pyarrow-23.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:f00f993a8179e0e1c9713bcc0baf6d6c01326a406a9c23495ec1ba9c9ebf2919", size = 27527698, upload-time = "2026-02-16T10:09:50.263Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/af/63/ba23862d69652f85b615ca14ad14f3bcfc5bf1b99ef3f0cd04ff93fdad5a/pyarrow-22.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:bea79263d55c24a32b0d79c00a1c58bb2ee5f0757ed95656b01c0fb310c5af3d", size = 34211578, upload-time = "2025-10-24T10:05:21.583Z" },
|
{ url = "https://files.pythonhosted.org/packages/9a/4b/4166bb5abbfe6f750fc60ad337c43ecf61340fa52ab386da6e8dbf9e63c4/pyarrow-23.0.1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:f4b0dbfa124c0bb161f8b5ebb40f1a680b70279aa0c9901d44a2b5a20806039f", size = 34214575, upload-time = "2026-02-16T10:09:56.225Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/b1/d0/f9ad86fe809efd2bcc8be32032fa72e8b0d112b01ae56a053006376c5930/pyarrow-22.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:12fe549c9b10ac98c91cf791d2945e878875d95508e1a5d14091a7aaa66d9cf8", size = 35989906, upload-time = "2025-10-24T10:05:29.485Z" },
|
{ url = "https://files.pythonhosted.org/packages/e1/da/3f941e3734ac8088ea588b53e860baeddac8323ea40ce22e3d0baa865cc9/pyarrow-23.0.1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:7707d2b6673f7de054e2e83d59f9e805939038eebe1763fe811ee8fa5c0cd1a7", size = 35832540, upload-time = "2026-02-16T10:10:03.428Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/b4/a8/f910afcb14630e64d673f15904ec27dd31f1e009b77033c365c84e8c1e1d/pyarrow-22.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:334f900ff08ce0423407af97e6c26ad5d4e3b0763645559ece6fbf3747d6a8f5", size = 45021677, upload-time = "2025-10-24T10:05:38.274Z" },
|
{ url = "https://files.pythonhosted.org/packages/88/7c/3d841c366620e906d54430817531b877ba646310296df42ef697308c2705/pyarrow-23.0.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:86ff03fb9f1a320266e0de855dee4b17da6794c595d207f89bba40d16b5c78b9", size = 44470940, upload-time = "2026-02-16T10:10:10.704Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/13/95/aec81f781c75cd10554dc17a25849c720d54feafb6f7847690478dcf5ef8/pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:c6c791b09c57ed76a18b03f2631753a4960eefbbca80f846da8baefc6491fcfe", size = 47726315, upload-time = "2025-10-24T10:05:47.314Z" },
|
{ url = "https://files.pythonhosted.org/packages/2c/a5/da83046273d990f256cb79796a190bbf7ec999269705ddc609403f8c6b06/pyarrow-23.0.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:813d99f31275919c383aab17f0f455a04f5a429c261cc411b1e9a8f5e4aaaa05", size = 47586063, upload-time = "2026-02-16T10:10:17.95Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/bb/d4/74ac9f7a54cfde12ee42734ea25d5a3c9a45db78f9def949307a92720d37/pyarrow-22.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c3200cb41cdbc65156e5f8c908d739b0dfed57e890329413da2748d1a2cd1a4e", size = 47990906, upload-time = "2025-10-24T10:05:58.254Z" },
|
{ url = "https://files.pythonhosted.org/packages/5b/3c/b7d2ebcff47a514f47f9da1e74b7949138c58cfeb108cdd4ee62f43f0cf3/pyarrow-23.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bf5842f960cddd2ef757d486041d57c96483efc295a8c4a0e20e704cbbf39c67", size = 48173045, upload-time = "2026-02-16T10:10:25.363Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/2e/71/fedf2499bf7a95062eafc989ace56572f3343432570e1c54e6599d5b88da/pyarrow-22.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ac93252226cf288753d8b46280f4edf3433bf9508b6977f8dd8526b521a1bbb9", size = 50306783, upload-time = "2025-10-24T10:06:08.08Z" },
|
{ url = "https://files.pythonhosted.org/packages/43/b2/b40961262213beaba6acfc88698eb773dfce32ecdf34d19291db94c2bd73/pyarrow-23.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:564baf97c858ecc03ec01a41062e8f4698abc3e6e2acd79c01c2e97880a19730", size = 50621741, upload-time = "2026-02-16T10:10:33.477Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/68/ed/b202abd5a5b78f519722f3d29063dda03c114711093c1995a33b8e2e0f4b/pyarrow-22.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:44729980b6c50a5f2bfcc2668d36c569ce17f8b17bccaf470c4313dcbbf13c9d", size = 27972883, upload-time = "2025-10-24T10:06:14.204Z" },
|
{ url = "https://files.pythonhosted.org/packages/f6/70/1fdda42d65b28b078e93d75d371b2185a61da89dda4def8ba6ba41ebdeb4/pyarrow-23.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:07deae7783782ac7250989a7b2ecde9b3c343a643f82e8a4df03d93b633006f0", size = 27620678, upload-time = "2026-02-16T10:10:39.31Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/a6/d6/d0fac16a2963002fc22c8fa75180a838737203d558f0ed3b564c4a54eef5/pyarrow-22.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e6e95176209257803a8b3d0394f21604e796dadb643d2f7ca21b66c9c0b30c9a", size = 34204629, upload-time = "2025-10-24T10:06:20.274Z" },
|
{ url = "https://files.pythonhosted.org/packages/47/10/2cbe4c6f0fb83d2de37249567373d64327a5e4d8db72f486db42875b08f6/pyarrow-23.0.1-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:6b8fda694640b00e8af3c824f99f789e836720aa8c9379fb435d4c4953a756b8", size = 34210066, upload-time = "2026-02-16T10:10:45.487Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/c6/9c/1d6357347fbae062ad3f17082f9ebc29cc733321e892c0d2085f42a2212b/pyarrow-22.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:001ea83a58024818826a9e3f89bf9310a114f7e26dfe404a4c32686f97bd7901", size = 35985783, upload-time = "2025-10-24T10:06:27.301Z" },
|
{ url = "https://files.pythonhosted.org/packages/cb/4f/679fa7e84dadbaca7a65f7cdba8d6c83febbd93ca12fa4adf40ba3b6362b/pyarrow-23.0.1-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:8ff51b1addc469b9444b7c6f3548e19dc931b172ab234e995a60aea9f6e6025f", size = 35825526, upload-time = "2026-02-16T10:10:52.266Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/ff/c0/782344c2ce58afbea010150df07e3a2f5fdad299cd631697ae7bd3bac6e3/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:ce20fe000754f477c8a9125543f1936ea5b8867c5406757c224d745ed033e691", size = 45020999, upload-time = "2025-10-24T10:06:35.387Z" },
|
{ url = "https://files.pythonhosted.org/packages/f9/63/d2747d930882c9d661e9398eefc54f15696547b8983aaaf11d4a2e8b5426/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:71c5be5cbf1e1cb6169d2a0980850bccb558ddc9b747b6206435313c47c37677", size = 44473279, upload-time = "2026-02-16T10:11:01.557Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/1b/8b/5362443737a5307a7b67c1017c42cd104213189b4970bf607e05faf9c525/pyarrow-22.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:e0a15757fccb38c410947df156f9749ae4a3c89b2393741a50521f39a8cf202a", size = 47724601, upload-time = "2025-10-24T10:06:43.551Z" },
|
{ url = "https://files.pythonhosted.org/packages/b3/93/10a48b5e238de6d562a411af6467e71e7aedbc9b87f8d3a35f1560ae30fb/pyarrow-23.0.1-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:9b6f4f17b43bc39d56fec96e53fe89d94bac3eb134137964371b45352d40d0c2", size = 47585798, upload-time = "2026-02-16T10:11:09.401Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/69/4d/76e567a4fc2e190ee6072967cb4672b7d9249ac59ae65af2d7e3047afa3b/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:cedb9dd9358e4ea1d9bce3665ce0797f6adf97ff142c8e25b46ba9cdd508e9b6", size = 48001050, upload-time = "2025-10-24T10:06:52.284Z" },
|
{ url = "https://files.pythonhosted.org/packages/5c/20/476943001c54ef078dbf9542280e22741219a184a0632862bca4feccd666/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:9fc13fc6c403d1337acab46a2c4346ca6c9dec5780c3c697cf8abfd5e19b6b37", size = 48179446, upload-time = "2026-02-16T10:11:17.781Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/01/5e/5653f0535d2a1aef8223cee9d92944cb6bccfee5cf1cd3f462d7cb022790/pyarrow-22.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:252be4a05f9d9185bb8c18e83764ebcfea7185076c07a7a662253af3a8c07941", size = 50307877, upload-time = "2025-10-24T10:07:02.405Z" },
|
{ url = "https://files.pythonhosted.org/packages/4b/b6/5dd0c47b335fcd8edba9bfab78ad961bd0fd55ebe53468cc393f45e0be60/pyarrow-23.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5c16ed4f53247fa3ffb12a14d236de4213a4415d127fe9cebed33d51671113e2", size = 50623972, upload-time = "2026-02-16T10:11:26.185Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/2d/f8/1d0bd75bf9328a3b826e24a16e5517cd7f9fbf8d34a3184a4566ef5a7f29/pyarrow-22.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:a4893d31e5ef780b6edcaf63122df0f8d321088bb0dee4c8c06eccb1ca28d145", size = 27977099, upload-time = "2025-10-24T10:08:07.259Z" },
|
{ url = "https://files.pythonhosted.org/packages/d5/09/a532297c9591a727d67760e2e756b83905dd89adb365a7f6e9c72578bcc1/pyarrow-23.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:cecfb12ef629cf6be0b1887f9f86463b0dd3dc3195ae6224e74006be4736035a", size = 27540749, upload-time = "2026-02-16T10:12:23.297Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/90/81/db56870c997805bf2b0f6eeeb2d68458bf4654652dccdcf1bf7a42d80903/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:f7fe3dbe871294ba70d789be16b6e7e52b418311e166e0e3cba9522f0f437fb1", size = 34336685, upload-time = "2025-10-24T10:07:11.47Z" },
|
{ url = "https://files.pythonhosted.org/packages/a5/8e/38749c4b1303e6ae76b3c80618f84861ae0c55dd3c2273842ea6f8258233/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:29f7f7419a0e30264ea261fdc0e5fe63ce5a6095003db2945d7cd78df391a7e1", size = 34471544, upload-time = "2026-02-16T10:11:32.535Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/1c/98/0727947f199aba8a120f47dfc229eeb05df15bcd7a6f1b669e9f882afc58/pyarrow-22.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:ba95112d15fd4f1105fb2402c4eab9068f0554435e9b7085924bcfaac2cc306f", size = 36032158, upload-time = "2025-10-24T10:07:18.626Z" },
|
{ url = "https://files.pythonhosted.org/packages/a3/73/f237b2bc8c669212f842bcfd842b04fc8d936bfc9d471630569132dc920d/pyarrow-23.0.1-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:33d648dc25b51fd8055c19e4261e813dfc4d2427f068bcecc8b53d01b81b0500", size = 35949911, upload-time = "2026-02-16T10:11:39.813Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/96/b4/9babdef9c01720a0785945c7cf550e4acd0ebcd7bdd2e6f0aa7981fa85e2/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c064e28361c05d72eed8e744c9605cbd6d2bb7481a511c74071fd9b24bc65d7d", size = 44892060, upload-time = "2025-10-24T10:07:26.002Z" },
|
{ url = "https://files.pythonhosted.org/packages/0c/86/b912195eee0903b5611bf596833def7d146ab2d301afeb4b722c57ffc966/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:cd395abf8f91c673dd3589cadc8cc1ee4e8674fa61b2e923c8dd215d9c7d1f41", size = 44520337, upload-time = "2026-02-16T10:11:47.764Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/f8/ca/2f8804edd6279f78a37062d813de3f16f29183874447ef6d1aadbb4efa0f/pyarrow-22.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:6f9762274496c244d951c819348afbcf212714902742225f649cf02823a6a10f", size = 47504395, upload-time = "2025-10-24T10:07:34.09Z" },
|
{ url = "https://files.pythonhosted.org/packages/69/c2/f2a717fb824f62d0be952ea724b4f6f9372a17eed6f704b5c9526f12f2f1/pyarrow-23.0.1-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:00be9576d970c31defb5c32eb72ef585bf600ef6d0a82d5eccaae96639cf9d07", size = 47548944, upload-time = "2026-02-16T10:11:56.607Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/b9/f0/77aa5198fd3943682b2e4faaf179a674f0edea0d55d326d83cb2277d9363/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a9d9ffdc2ab696f6b15b4d1f7cec6658e1d788124418cb30030afbae31c64746", size = 48066216, upload-time = "2025-10-24T10:07:43.528Z" },
|
{ url = "https://files.pythonhosted.org/packages/84/a7/90007d476b9f0dc308e3bc57b832d004f848fd6c0da601375d20d92d1519/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:c2139549494445609f35a5cda4eb94e2c9e4d704ce60a095b342f82460c73a83", size = 48236269, upload-time = "2026-02-16T10:12:04.47Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/79/87/a1937b6e78b2aff18b706d738c9e46ade5bfcf11b294e39c87706a0089ac/pyarrow-22.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ec1a15968a9d80da01e1d30349b2b0d7cc91e96588ee324ce1b5228175043e95", size = 50288552, upload-time = "2025-10-24T10:07:53.519Z" },
|
{ url = "https://files.pythonhosted.org/packages/b0/3f/b16fab3e77709856eb6ac328ce35f57a6d4a18462c7ca5186ef31b45e0e0/pyarrow-23.0.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:7044b442f184d84e2351e5084600f0d7343d6117aabcbc1ac78eb1ae11eb4125", size = 50604794, upload-time = "2026-02-16T10:12:11.797Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/60/ae/b5a5811e11f25788ccfdaa8f26b6791c9807119dffcf80514505527c384c/pyarrow-22.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:bba208d9c7decf9961998edf5c65e3ea4355d5818dd6cd0f6809bec1afb951cc", size = 28262504, upload-time = "2025-10-24T10:08:00.932Z" },
|
{ url = "https://files.pythonhosted.org/packages/e9/a1/22df0620a9fac31d68397a75465c344e83c3dfe521f7612aea33e27ab6c0/pyarrow-23.0.1-cp313-cp313t-win_amd64.whl", hash = "sha256:a35581e856a2fafa12f3f54fce4331862b1cfb0bef5758347a858a4aa9d6bae8", size = 27660642, upload-time = "2026-02-16T10:12:17.746Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/bd/b0/0fa4d28a8edb42b0a7144edd20befd04173ac79819547216f8a9f36f9e50/pyarrow-22.0.0-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:9bddc2cade6561f6820d4cd73f99a0243532ad506bc510a75a5a65a522b2d74d", size = 34224062, upload-time = "2025-10-24T10:08:14.101Z" },
|
{ url = "https://files.pythonhosted.org/packages/8d/1b/6da9a89583ce7b23ac611f183ae4843cd3a6cf54f079549b0e8c14031e73/pyarrow-23.0.1-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:5df1161da23636a70838099d4aaa65142777185cc0cdba4037a18cee7d8db9ca", size = 34238755, upload-time = "2026-02-16T10:12:32.819Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/0f/a8/7a719076b3c1be0acef56a07220c586f25cd24de0e3f3102b438d18ae5df/pyarrow-22.0.0-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:e70ff90c64419709d38c8932ea9fe1cc98415c4f87ea8da81719e43f02534bc9", size = 35990057, upload-time = "2025-10-24T10:08:21.842Z" },
|
{ url = "https://files.pythonhosted.org/packages/ae/b5/d58a241fbe324dbaeb8df07be6af8752c846192d78d2272e551098f74e88/pyarrow-23.0.1-cp314-cp314-macosx_12_0_x86_64.whl", hash = "sha256:fa8e51cb04b9f8c9c5ace6bab63af9a1f88d35c0d6cbf53e8c17c098552285e1", size = 35847826, upload-time = "2026-02-16T10:12:38.949Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/89/3c/359ed54c93b47fb6fe30ed16cdf50e3f0e8b9ccfb11b86218c3619ae50a8/pyarrow-22.0.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:92843c305330aa94a36e706c16209cd4df274693e777ca47112617db7d0ef3d7", size = 45068002, upload-time = "2025-10-24T10:08:29.034Z" },
|
{ url = "https://files.pythonhosted.org/packages/54/a5/8cbc83f04aba433ca7b331b38f39e000efd9f0c7ce47128670e737542996/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:0b95a3994f015be13c63148fef8832e8a23938128c185ee951c98908a696e0eb", size = 44536859, upload-time = "2026-02-16T10:12:45.467Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/55/fc/4945896cc8638536ee787a3bd6ce7cec8ec9acf452d78ec39ab328efa0a1/pyarrow-22.0.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:6dda1ddac033d27421c20d7a7943eec60be44e0db4e079f33cc5af3b8280ccde", size = 47737765, upload-time = "2025-10-24T10:08:38.559Z" },
|
{ url = "https://files.pythonhosted.org/packages/36/2e/c0f017c405fcdc252dbccafbe05e36b0d0eb1ea9a958f081e01c6972927f/pyarrow-23.0.1-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:4982d71350b1a6e5cfe1af742c53dfb759b11ce14141870d05d9e540d13bc5d1", size = 47614443, upload-time = "2026-02-16T10:12:55.525Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/cd/5e/7cb7edeb2abfaa1f79b5d5eb89432356155c8426f75d3753cbcb9592c0fd/pyarrow-22.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:84378110dd9a6c06323b41b56e129c504d157d1a983ce8f5443761eb5256bafc", size = 48048139, upload-time = "2025-10-24T10:08:46.784Z" },
|
{ url = "https://files.pythonhosted.org/packages/af/6b/2314a78057912f5627afa13ba43809d9d653e6630859618b0fd81a4e0759/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c250248f1fe266db627921c89b47b7c06fee0489ad95b04d50353537d74d6886", size = 48232991, upload-time = "2026-02-16T10:13:04.729Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/88/c6/546baa7c48185f5e9d6e59277c4b19f30f48c94d9dd938c2a80d4d6b067c/pyarrow-22.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:854794239111d2b88b40b6ef92aa478024d1e5074f364033e73e21e3f76b25e0", size = 50314244, upload-time = "2025-10-24T10:08:55.771Z" },
|
{ url = "https://files.pythonhosted.org/packages/40/f2/1bcb1d3be3460832ef3370d621142216e15a2c7c62602a4ea19ec240dd64/pyarrow-23.0.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5f4763b83c11c16e5f4c15601ba6dfa849e20723b46aa2617cb4bffe8768479f", size = 50645077, upload-time = "2026-02-16T10:13:14.147Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/3c/79/755ff2d145aafec8d347bf18f95e4e81c00127f06d080135dfc86aea417c/pyarrow-22.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:b883fe6fd85adad7932b3271c38ac289c65b7337c2c132e9569f9d3940620730", size = 28757501, upload-time = "2025-10-24T10:09:59.891Z" },
|
{ url = "https://files.pythonhosted.org/packages/eb/3f/b1da7b61cd66566a4d4c8383d376c606d1c34a906c3f1cb35c479f59d1aa/pyarrow-23.0.1-cp314-cp314-win_amd64.whl", hash = "sha256:3a4c85ef66c134161987c17b147d6bffdca4566f9a4c1d81a0a01cdf08414ea5", size = 28234271, upload-time = "2026-02-16T10:14:09.397Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/0e/d2/237d75ac28ced3147912954e3c1a174df43a95f4f88e467809118a8165e0/pyarrow-22.0.0-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:7a820d8ae11facf32585507c11f04e3f38343c1e784c9b5a8b1da5c930547fe2", size = 34355506, upload-time = "2025-10-24T10:09:02.953Z" },
|
{ url = "https://files.pythonhosted.org/packages/b5/78/07f67434e910a0f7323269be7bfbf58699bd0c1d080b18a1ab49ba943fe8/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:17cd28e906c18af486a499422740298c52d7c6795344ea5002a7720b4eadf16d", size = 34488692, upload-time = "2026-02-16T10:13:21.541Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/1e/2c/733dfffe6d3069740f98e57ff81007809067d68626c5faef293434d11bd6/pyarrow-22.0.0-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:c6ec3675d98915bf1ec8b3c7986422682f7232ea76cad276f4c8abd5b7319b70", size = 36047312, upload-time = "2025-10-24T10:09:10.334Z" },
|
{ url = "https://files.pythonhosted.org/packages/50/76/34cf7ae93ece1f740a04910d9f7e80ba166b9b4ab9596a953e9e62b90fe1/pyarrow-23.0.1-cp314-cp314t-macosx_12_0_x86_64.whl", hash = "sha256:76e823d0e86b4fb5e1cf4a58d293036e678b5a4b03539be933d3b31f9406859f", size = 35964383, upload-time = "2026-02-16T10:13:28.63Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/7c/2b/29d6e3782dc1f299727462c1543af357a0f2c1d3c160ce199950d9ca51eb/pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:3e739edd001b04f654b166204fc7a9de896cf6007eaff33409ee9e50ceaff754", size = 45081609, upload-time = "2025-10-24T10:09:18.61Z" },
|
{ url = "https://files.pythonhosted.org/packages/46/90/459b827238936d4244214be7c684e1b366a63f8c78c380807ae25ed92199/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:a62e1899e3078bf65943078b3ad2a6ddcacf2373bc06379aac61b1e548a75814", size = 44538119, upload-time = "2026-02-16T10:13:35.506Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/8d/42/aa9355ecc05997915af1b7b947a7f66c02dcaa927f3203b87871c114ba10/pyarrow-22.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:7388ac685cab5b279a41dfe0a6ccd99e4dbf322edfb63e02fc0443bf24134e91", size = 47703663, upload-time = "2025-10-24T10:09:27.369Z" },
|
{ url = "https://files.pythonhosted.org/packages/28/a1/93a71ae5881e99d1f9de1d4554a87be37da11cd6b152239fb5bd924fdc64/pyarrow-23.0.1-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:df088e8f640c9fae3b1f495b3c64755c4e719091caf250f3a74d095ddf3c836d", size = 47571199, upload-time = "2026-02-16T10:13:42.504Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/ee/62/45abedde480168e83a1de005b7b7043fd553321c1e8c5a9a114425f64842/pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:f633074f36dbc33d5c05b5dc75371e5660f1dbf9c8b1d95669def05e5425989c", size = 48066543, upload-time = "2025-10-24T10:09:34.908Z" },
|
{ url = "https://files.pythonhosted.org/packages/88/a3/d2c462d4ef313521eaf2eff04d204ac60775263f1fb08c374b543f79f610/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:46718a220d64677c93bc243af1d44b55998255427588e400677d7192671845c7", size = 48259435, upload-time = "2026-02-16T10:13:49.226Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/84/e9/7878940a5b072e4f3bf998770acafeae13b267f9893af5f6d4ab3904b67e/pyarrow-22.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:4c19236ae2402a8663a2c8f21f1870a03cc57f0bef7e4b6eb3238cc82944de80", size = 50288838, upload-time = "2025-10-24T10:09:44.394Z" },
|
{ url = "https://files.pythonhosted.org/packages/cc/f1/11a544b8c3d38a759eb3fbb022039117fd633e9a7b19e4841cc3da091915/pyarrow-23.0.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a09f3876e87f48bc2f13583ab551f0379e5dfb83210391e68ace404181a20690", size = 50629149, upload-time = "2026-02-16T10:13:57.238Z" },
|
||||||
{ url = "https://files.pythonhosted.org/packages/7b/03/f335d6c52b4a4761bcc83499789a1e2e16d9d201a58c327a9b5cc9a41bd9/pyarrow-22.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0c34fe18094686194f204a3b1787a27456897d8a2d62caf84b61e8dfbc0252ae", size = 29185594, upload-time = "2025-10-24T10:09:53.111Z" },
|
{ url = "https://files.pythonhosted.org/packages/50/f2/c0e76a0b451ffdf0cf788932e182758eb7558953f4f27f1aff8e2518b653/pyarrow-23.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:527e8d899f14bd15b740cd5a54ad56b7f98044955373a17179d5956ddb93d9ce", size = 28365807, upload-time = "2026-02-16T10:14:03.892Z" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|||||||
Reference in New Issue
Block a user