refactor(ml): model sessions (#10559)

This commit is contained in:
Mert
2024-06-25 12:00:24 -04:00
committed by GitHub
parent 6538ad8de7
commit 6356c28f64
11 changed files with 529 additions and 375 deletions

View File

@@ -1,68 +0,0 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, NamedTuple
import numpy as np
from numpy.typing import NDArray
from ann.ann import Ann
from ..config import log, settings
class AnnSession:
"""
Wrapper for ANN to be drop-in replacement for ONNX session.
"""
def __init__(self, model_path: Path):
tuning_file = Path(settings.cache_folder) / "gpu-tuning.ann"
with tuning_file.open(mode="a"):
# make sure tuning file exists (without clearing contents)
# once filled, the tuning file reduces the cost/time of the first
# inference after model load by 10s of seconds
pass
self.ann = Ann(tuning_level=3, tuning_file=tuning_file.as_posix())
log.info("Loading ANN model %s ...", model_path)
cache_file = model_path.with_suffix(".anncache")
save = False
if not cache_file.is_file():
save = True
with cache_file.open(mode="a"):
# create empty model cache file
pass
self.model = self.ann.load(
model_path.as_posix(),
save_cached_network=save,
cached_network_path=cache_file.as_posix(),
)
log.info("Loaded ANN model with ID %d", self.model)
def __del__(self) -> None:
self.ann.unload(self.model)
log.info("Unloaded ANN model %d", self.model)
self.ann.destroy()
def get_inputs(self) -> list[AnnNode]:
shapes = self.ann.input_shapes[self.model]
return [AnnNode(None, s) for s in shapes]
def get_outputs(self) -> list[AnnNode]:
shapes = self.ann.output_shapes[self.model]
return [AnnNode(None, s) for s in shapes]
def run(
self,
output_names: list[str] | None,
input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
run_options: Any = None,
) -> list[NDArray[np.float32]]:
inputs: list[NDArray[np.float32]] = [np.ascontiguousarray(v) for v in input_feed.values()]
return self.ann.execute(self.model, inputs)
class AnnNode(NamedTuple):
name: str | None
shape: tuple[int, ...]

View File

@@ -5,15 +5,14 @@ from pathlib import Path
from shutil import rmtree
from typing import Any, ClassVar
import onnxruntime as ort
from huggingface_hub import snapshot_download
import ann.ann
from app.models.constants import SUPPORTED_PROVIDERS
from app.sessions.ort import OrtSession
from ..config import clean_name, log, settings
from ..schemas import ModelFormat, ModelIdentity, ModelSession, ModelTask, ModelType
from .ann import AnnSession
from ..sessions.ann import AnnSession
class InferenceModel(ABC):
@@ -24,20 +23,17 @@ class InferenceModel(ABC):
self,
model_name: str,
cache_dir: Path | str | None = None,
providers: list[str] | None = None,
provider_options: list[dict[str, Any]] | None = None,
sess_options: ort.SessionOptions | None = None,
preferred_format: ModelFormat | None = None,
session: ModelSession | None = None,
**model_kwargs: Any,
) -> None:
self.loaded = False
self.loaded = session is not None
self.load_attempts = 0
self.model_name = clean_name(model_name)
self.cache_dir = Path(cache_dir) if cache_dir is not None else self.cache_dir_default
self.providers = providers if providers is not None else self.providers_default
self.provider_options = provider_options if provider_options is not None else self.provider_options_default
self.sess_options = sess_options if sess_options is not None else self.sess_options_default
self.preferred_format = preferred_format if preferred_format is not None else self.preferred_format_default
self.cache_dir = Path(cache_dir) if cache_dir is not None else self._cache_dir_default
self.model_format = preferred_format if preferred_format is not None else self._model_format_default
if session is not None:
self.session = session
def download(self) -> None:
if not self.cached:
@@ -70,7 +66,7 @@ class InferenceModel(ABC):
pass
def _download(self) -> None:
ignore_patterns = [] if self.preferred_format == ModelFormat.ARMNN else ["*.armnn"]
ignore_patterns = [] if self.model_format == ModelFormat.ARMNN else ["*.armnn"]
snapshot_download(
f"immich-app/{clean_name(self.model_name)}",
cache_dir=self.cache_dir,
@@ -105,26 +101,11 @@ class InferenceModel(ABC):
self.cache_dir.mkdir(parents=True, exist_ok=True)
def _make_session(self, model_path: Path) -> ModelSession:
if not model_path.is_file():
onnx_path = model_path.with_suffix(".onnx")
if not onnx_path.is_file():
raise ValueError(f"Model path '{model_path}' does not exist")
log.warning(
f"Could not find model path '{model_path}'. " f"Falling back to ONNX model path '{onnx_path}' instead.",
)
model_path = onnx_path
match model_path.suffix:
case ".armnn":
session = AnnSession(model_path)
session: ModelSession = AnnSession(model_path)
case ".onnx":
session = ort.InferenceSession(
model_path.as_posix(),
sess_options=self.sess_options,
providers=self.providers,
provider_options=self.provider_options,
)
session = OrtSession(model_path)
case _:
raise ValueError(f"Unsupported model file type: {model_path.suffix}")
return session
@@ -135,7 +116,7 @@ class InferenceModel(ABC):
@property
def model_path(self) -> Path:
return self.model_dir / f"model.{self.preferred_format}"
return self.model_dir / f"model.{self.model_format}"
@property
def model_task(self) -> ModelTask:
@@ -154,7 +135,7 @@ class InferenceModel(ABC):
self._cache_dir = cache_dir
@property
def cache_dir_default(self) -> Path:
def _cache_dir_default(self) -> Path:
return settings.cache_folder / self.model_task.value / self.model_name
@property
@@ -162,95 +143,18 @@ class InferenceModel(ABC):
return self.model_path.is_file()
@property
def providers(self) -> list[str]:
return self._providers
@providers.setter
def providers(self, providers: list[str]) -> None:
log.info(
(f"Setting '{self.model_name}' execution providers to {providers}, " "in descending order of preference"),
)
self._providers = providers
@property
def providers_default(self) -> list[str]:
available_providers = set(ort.get_available_providers())
log.debug(f"Available ORT providers: {available_providers}")
if (openvino := "OpenVINOExecutionProvider") in available_providers:
device_ids: list[str] = ort.capi._pybind_state.get_available_openvino_device_ids()
log.debug(f"Available OpenVINO devices: {device_ids}")
gpu_devices = [device_id for device_id in device_ids if device_id.startswith("GPU")]
if not gpu_devices:
log.warning("No GPU device found in OpenVINO. Falling back to CPU.")
available_providers.remove(openvino)
return [provider for provider in SUPPORTED_PROVIDERS if provider in available_providers]
@property
def provider_options(self) -> list[dict[str, Any]]:
return self._provider_options
@provider_options.setter
def provider_options(self, provider_options: list[dict[str, Any]]) -> None:
log.debug(f"Setting execution provider options to {provider_options}")
self._provider_options = provider_options
@property
def provider_options_default(self) -> list[dict[str, Any]]:
options = []
for provider in self.providers:
match provider:
case "CPUExecutionProvider" | "CUDAExecutionProvider":
option = {"arena_extend_strategy": "kSameAsRequested"}
case "OpenVINOExecutionProvider":
option = {"device_type": "GPU_FP32", "cache_dir": (self.cache_dir / "openvino").as_posix()}
case _:
option = {}
options.append(option)
return options
@property
def sess_options(self) -> ort.SessionOptions:
return self._sess_options
@sess_options.setter
def sess_options(self, sess_options: ort.SessionOptions) -> None:
log.debug(f"Setting execution_mode to {sess_options.execution_mode.name}")
log.debug(f"Setting inter_op_num_threads to {sess_options.inter_op_num_threads}")
log.debug(f"Setting intra_op_num_threads to {sess_options.intra_op_num_threads}")
self._sess_options = sess_options
@property
def sess_options_default(self) -> ort.SessionOptions:
sess_options = ort.SessionOptions()
sess_options.enable_cpu_mem_arena = False
# avoid thread contention between models
if settings.model_inter_op_threads > 0:
sess_options.inter_op_num_threads = settings.model_inter_op_threads
# these defaults work well for CPU, but bottleneck GPU
elif settings.model_inter_op_threads == 0 and self.providers == ["CPUExecutionProvider"]:
sess_options.inter_op_num_threads = 1
if settings.model_intra_op_threads > 0:
sess_options.intra_op_num_threads = settings.model_intra_op_threads
elif settings.model_intra_op_threads == 0 and self.providers == ["CPUExecutionProvider"]:
sess_options.intra_op_num_threads = 2
if sess_options.inter_op_num_threads > 1:
sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL
return sess_options
@property
def preferred_format(self) -> ModelFormat:
def model_format(self) -> ModelFormat:
return self._preferred_format
@preferred_format.setter
def preferred_format(self, preferred_format: ModelFormat) -> None:
@model_format.setter
def model_format(self, preferred_format: ModelFormat) -> None:
log.debug(f"Setting preferred format to {preferred_format}")
self._preferred_format = preferred_format
@property
def preferred_format_default(self) -> ModelFormat:
return ModelFormat.ARMNN if ann.ann.is_available and settings.ann else ModelFormat.ONNX
def _model_format_default(self) -> ModelFormat:
prefer_ann = ann.ann.is_available and settings.ann
ann_exists = (self.model_dir / "model.armnn").is_file()
if prefer_ann and not ann_exists:
log.warning(f"ARM NN is available, but '{self.model_name}' does not support ARM NN. Falling back to ONNX.")
return ModelFormat.ARMNN if prefer_ann and ann_exists else ModelFormat.ONNX

View File

@@ -3,7 +3,6 @@ from typing import Any
import numpy as np
import onnx
import onnxruntime as ort
from insightface.model_zoo import ArcFaceONNX
from insightface.utils.face_align import norm_crop
from numpy.typing import NDArray
@@ -13,7 +12,8 @@ from PIL import Image
from app.config import clean_name, log
from app.models.base import InferenceModel
from app.models.transforms import decode_cv2
from app.schemas import FaceDetectionOutput, FacialRecognitionOutput, ModelSession, ModelTask, ModelType
from app.schemas import FaceDetectionOutput, FacialRecognitionOutput, ModelFormat, ModelSession, ModelTask, ModelType
from app.sessions import has_batch_axis
class FaceRecognizer(InferenceModel):
@@ -27,13 +27,14 @@ class FaceRecognizer(InferenceModel):
cache_dir: Path | str | None = None,
**model_kwargs: Any,
) -> None:
self.min_score = model_kwargs.pop("minScore", min_score)
super().__init__(clean_name(model_name), cache_dir, **model_kwargs)
self.min_score = model_kwargs.pop("minScore", min_score)
self.batch = self.model_format == ModelFormat.ONNX
def _load(self) -> ModelSession:
session = self._make_session(self.model_path)
if not self._has_batch_dim(session):
self._add_batch_dim(self.model_path)
if self.model_format == ModelFormat.ONNX and not has_batch_axis(session):
self._add_batch_axis(self.model_path)
session = self._make_session(self.model_path)
self.model = ArcFaceONNX(
self.model_path.with_suffix(".onnx").as_posix(),
@@ -47,9 +48,20 @@ class FaceRecognizer(InferenceModel):
if faces["boxes"].shape[0] == 0:
return []
inputs = decode_cv2(inputs)
embeddings: NDArray[np.float32] = self.model.get_feat(self._crop(inputs, faces))
cropped_faces = self._crop(inputs, faces)
embeddings = self._predict_batch(cropped_faces) if self.batch else self._predict_single(cropped_faces)
return self.postprocess(faces, embeddings)
def _predict_batch(self, cropped_faces: list[NDArray[np.uint8]]) -> NDArray[np.float32]:
embeddings: NDArray[np.float32] = self.model.get_feat(cropped_faces)
return embeddings
def _predict_single(self, cropped_faces: list[NDArray[np.uint8]]) -> NDArray[np.float32]:
embeddings: list[NDArray[np.float32]] = []
for face in cropped_faces:
embeddings.append(self.model.get_feat(face))
return np.concatenate(embeddings, axis=0)
def postprocess(self, faces: FaceDetectionOutput, embeddings: NDArray[np.float32]) -> FacialRecognitionOutput:
return [
{
@@ -63,11 +75,8 @@ class FaceRecognizer(InferenceModel):
def _crop(self, image: NDArray[np.uint8], faces: FaceDetectionOutput) -> list[NDArray[np.uint8]]:
return [norm_crop(image, landmark) for landmark in faces["landmarks"]]
def _has_batch_dim(self, session: ort.InferenceSession) -> bool:
return not isinstance(session, ort.InferenceSession) or session.get_inputs()[0].shape[0] == "batch"
def _add_batch_dim(self, model_path: Path) -> None:
log.debug(f"Adding batch dimension to model {model_path}")
def _add_batch_axis(self, model_path: Path) -> None:
log.debug(f"Adding batch axis to model {model_path}")
proto = onnx.load(model_path)
static_input_dims = [shape.dim_value for shape in proto.graph.input[0].type.tensor_type.shape.dim[1:]]
static_output_dims = [shape.dim_value for shape in proto.graph.output[0].type.tensor_type.shape.dim[1:]]