feat: add max_memory parameter to limit memory usage (#83)
* add max_memory parameter to limit memory usage * Added to reload_model also * forgot to add self * Process max_memory once in __init__ and store it as an instance variable, then reuse it in both locations
This commit is contained in:
@@ -18,6 +18,9 @@ dtypes = [
|
||||
# Device map to pass to Accelerate when loading the model.
|
||||
device_map = "auto"
|
||||
|
||||
# Memory limits to impose. 0 is usually your first graphics card.
|
||||
# max_memory = {0 = "16GB", "cpu" = "64GB"}
|
||||
|
||||
# Number of input sequences to process in parallel (0 = auto).
|
||||
batch_size = 0 # auto
|
||||
|
||||
|
||||
@@ -61,6 +61,11 @@ class Settings(BaseSettings):
|
||||
description="Device map to pass to Accelerate when loading the model.",
|
||||
)
|
||||
|
||||
max_memory: Dict[str, str] | None = Field(
|
||||
default=None,
|
||||
description="Maximum memory to allocate per device (e.g., {'0': '20GB', 'cpu': '64GB'}).",
|
||||
)
|
||||
|
||||
trust_remote_code: bool | None = Field(
|
||||
default=None,
|
||||
description="Whether to trust remote code when loading the model.",
|
||||
|
||||
@@ -54,6 +54,11 @@ class Model:
|
||||
self.tokenizer.padding_side = "left"
|
||||
|
||||
self.model = None
|
||||
self.max_memory = (
|
||||
{int(k) if k.isdigit() else k: v for k, v in settings.max_memory.items()}
|
||||
if settings.max_memory
|
||||
else None
|
||||
)
|
||||
self.trusted_models = {settings.model: settings.trust_remote_code}
|
||||
|
||||
if self.settings.evaluate_model is not None:
|
||||
@@ -67,6 +72,7 @@ class Model:
|
||||
settings.model,
|
||||
dtype=dtype,
|
||||
device_map=settings.device_map,
|
||||
max_memory=self.max_memory,
|
||||
trust_remote_code=self.trusted_models.get(settings.model),
|
||||
)
|
||||
|
||||
@@ -109,6 +115,7 @@ class Model:
|
||||
self.settings.model,
|
||||
dtype=dtype,
|
||||
device_map=self.settings.device_map,
|
||||
max_memory=self.max_memory,
|
||||
trust_remote_code=self.trusted_models.get(self.settings.model),
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user