feat: add max_memory parameter to limit memory usage (#83)
* add max_memory parameter to limit memory usage * Added to reload_model also * forgot to add self * Process max_memory once in __init__ and store it as an instance variable, then reuse it in both locations
This commit is contained in:
@@ -18,6 +18,9 @@ dtypes = [
|
|||||||
# Device map to pass to Accelerate when loading the model.
|
# Device map to pass to Accelerate when loading the model.
|
||||||
device_map = "auto"
|
device_map = "auto"
|
||||||
|
|
||||||
|
# Memory limits to impose. 0 is usually your first graphics card.
|
||||||
|
# max_memory = {0 = "16GB", "cpu" = "64GB"}
|
||||||
|
|
||||||
# Number of input sequences to process in parallel (0 = auto).
|
# Number of input sequences to process in parallel (0 = auto).
|
||||||
batch_size = 0 # auto
|
batch_size = 0 # auto
|
||||||
|
|
||||||
|
|||||||
@@ -61,6 +61,11 @@ class Settings(BaseSettings):
|
|||||||
description="Device map to pass to Accelerate when loading the model.",
|
description="Device map to pass to Accelerate when loading the model.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
max_memory: Dict[str, str] | None = Field(
|
||||||
|
default=None,
|
||||||
|
description="Maximum memory to allocate per device (e.g., {'0': '20GB', 'cpu': '64GB'}).",
|
||||||
|
)
|
||||||
|
|
||||||
trust_remote_code: bool | None = Field(
|
trust_remote_code: bool | None = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="Whether to trust remote code when loading the model.",
|
description="Whether to trust remote code when loading the model.",
|
||||||
|
|||||||
@@ -54,6 +54,11 @@ class Model:
|
|||||||
self.tokenizer.padding_side = "left"
|
self.tokenizer.padding_side = "left"
|
||||||
|
|
||||||
self.model = None
|
self.model = None
|
||||||
|
self.max_memory = (
|
||||||
|
{int(k) if k.isdigit() else k: v for k, v in settings.max_memory.items()}
|
||||||
|
if settings.max_memory
|
||||||
|
else None
|
||||||
|
)
|
||||||
self.trusted_models = {settings.model: settings.trust_remote_code}
|
self.trusted_models = {settings.model: settings.trust_remote_code}
|
||||||
|
|
||||||
if self.settings.evaluate_model is not None:
|
if self.settings.evaluate_model is not None:
|
||||||
@@ -67,6 +72,7 @@ class Model:
|
|||||||
settings.model,
|
settings.model,
|
||||||
dtype=dtype,
|
dtype=dtype,
|
||||||
device_map=settings.device_map,
|
device_map=settings.device_map,
|
||||||
|
max_memory=self.max_memory,
|
||||||
trust_remote_code=self.trusted_models.get(settings.model),
|
trust_remote_code=self.trusted_models.get(settings.model),
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -109,6 +115,7 @@ class Model:
|
|||||||
self.settings.model,
|
self.settings.model,
|
||||||
dtype=dtype,
|
dtype=dtype,
|
||||||
device_map=self.settings.device_map,
|
device_map=self.settings.device_map,
|
||||||
|
max_memory=self.max_memory,
|
||||||
trust_remote_code=self.trusted_models.get(self.settings.model),
|
trust_remote_code=self.trusted_models.get(self.settings.model),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user