feat: add max_memory parameter to limit memory usage (#83)

* add max_memory parameter to limit memory usage

* Added to reload_model also

* forgot to add self

* Process max_memory once in __init__ and store it as an instance variable, then reuse it in both locations
This commit is contained in:
George
2025-12-11 17:27:40 +02:00
committed by GitHub
parent d9f2b0407a
commit 740aab61ba
3 changed files with 15 additions and 0 deletions
+3
View File
@@ -18,6 +18,9 @@ dtypes = [
# Device map to pass to Accelerate when loading the model.
device_map = "auto"
# Memory limits to impose. 0 is usually your first graphics card.
# max_memory = {0 = "16GB", "cpu" = "64GB"}
# Number of input sequences to process in parallel (0 = auto).
batch_size = 0 # auto