feat(ml): round-robin device assignment (#13237)

* round-robin device assignment

* docs and tests

clarify doc
This commit is contained in:
Mert
2024-10-07 17:37:45 -04:00
committed by GitHub
parent 063969ca05
commit bd826b0b9b
8 changed files with 62 additions and 7 deletions

View File

@@ -104,7 +104,7 @@ RUN echo "hard core 0" >> /etc/security/limits.conf && \
COPY --from=builder /opt/venv /opt/venv
COPY ann/ann.py /usr/src/ann/ann.py
COPY start.sh log_conf.json ./
COPY start.sh log_conf.json gunicorn_conf.py ./
COPY app .
ENTRYPOINT ["tini", "--"]
CMD ["./start.sh"]

View File

@@ -39,6 +39,10 @@ class Settings(BaseSettings):
case_sensitive = False
env_nested_delimiter = "__"
@property
def device_id(self) -> str:
return os.environ.get("MACHINE_LEARNING_DEVICE_ID", "0")
class LogSettings(BaseSettings):
immich_log_level: str = "info"

View File

@@ -86,11 +86,13 @@ class OrtSession:
provider_options = []
for provider in self.providers:
match provider:
case "CPUExecutionProvider" | "CUDAExecutionProvider":
case "CPUExecutionProvider":
options = {"arena_extend_strategy": "kSameAsRequested"}
case "CUDAExecutionProvider":
options = {"arena_extend_strategy": "kSameAsRequested", "device_id": settings.device_id}
case "OpenVINOExecutionProvider":
options = {
"device_type": "GPU",
"device_type": f"GPU.{settings.device_id}",
"precision": "FP32",
"cache_dir": (self.model_path.parent / "openvino").as_posix(),
}

View File

@@ -210,10 +210,24 @@ class TestOrtSession:
session = OrtSession(model_path, providers=["OpenVINOExecutionProvider", "CPUExecutionProvider"])
assert session.provider_options == [
{"device_type": "GPU", "precision": "FP32", "cache_dir": "/cache/ViT-B-32__openai/openvino"},
{"device_type": "GPU.0", "precision": "FP32", "cache_dir": "/cache/ViT-B-32__openai/openvino"},
{"arena_extend_strategy": "kSameAsRequested"},
]
def test_sets_device_id_for_openvino(self) -> None:
os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1"
session = OrtSession("ViT-B-32__openai", providers=["OpenVINOExecutionProvider"])
assert session.provider_options[0]["device_type"] == "GPU.1"
def test_sets_device_id_for_cuda(self) -> None:
os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1"
session = OrtSession("ViT-B-32__openai", providers=["CUDAExecutionProvider"])
assert session.provider_options[0]["device_id"] == "1"
def test_sets_provider_options_kwarg(self) -> None:
session = OrtSession(
"ViT-B-32__openai",

View File

@@ -0,0 +1,12 @@
import os
from gunicorn.arbiter import Arbiter
from gunicorn.workers.base import Worker
device_ids = os.environ.get("MACHINE_LEARNING_DEVICE_IDS", "0").replace(" ", "").split(",")
env = os.environ
# Round-robin device assignment for each worker
def pre_fork(arbiter: Arbiter, _: Worker) -> None:
env["MACHINE_LEARNING_DEVICE_ID"] = device_ids[len(arbiter.WORKERS) % len(device_ids)]

View File

@@ -17,6 +17,7 @@ fi
gunicorn app.main:app \
-k app.config.CustomUvicornWorker \
-c gunicorn_conf.py \
-b "$IMMICH_HOST":"$IMMICH_PORT" \
-w "$MACHINE_LEARNING_WORKERS" \
-t "$MACHINE_LEARNING_WORKER_TIMEOUT" \