feat: add max_memory parameter to limit memory usage (#83)
* add max_memory parameter to limit memory usage * Added to reload_model also * forgot to add self * Process max_memory once in __init__ and store it as an instance variable, then reuse it in both locations
This commit is contained in:
@@ -18,6 +18,9 @@ dtypes = [
|
||||
# Device map to pass to Accelerate when loading the model.
|
||||
device_map = "auto"
|
||||
|
||||
# Memory limits to impose. 0 is usually your first graphics card.
|
||||
# max_memory = {0 = "16GB", "cpu" = "64GB"}
|
||||
|
||||
# Number of input sequences to process in parallel (0 = auto).
|
||||
batch_size = 0 # auto
|
||||
|
||||
|
||||
Reference in New Issue
Block a user