Merge remote-tracking branch 'origin/main' into rknn-toolkit-lite2

This commit is contained in:
mertalev
2025-03-11 12:42:25 -04:00
1075 changed files with 44876 additions and 25256 deletions
+27 -15
View File
@@ -1,6 +1,6 @@
ARG DEVICE=cpu
FROM python:3.11-bookworm@sha256:adb581d8ed80edd03efd4dcad66db115b9ce8de8522b01720b9f3e6146f0884c AS builder-cpu
FROM python:3.11-bookworm@sha256:68a8863d0625f42d47e0684f33ca02f19d6094ef859a8af237aaf645195ed477 AS builder-cpu
FROM builder-cpu AS builder-openvino
@@ -21,22 +21,18 @@ FROM builder-${DEVICE} AS builder
ARG DEVICE
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PIP_NO_CACHE_DIR=true \
VIRTUAL_ENV="/opt/venv" \
PATH="/opt/venv/bin:${PATH}"
PYTHONUNBUFFERED=1
WORKDIR /usr/src/app
RUN apt-get update && apt-get install -y --no-install-recommends g++
RUN pip install --upgrade pip && pip install poetry
RUN poetry config installer.max-workers 10 && \
poetry config virtualenvs.create false
RUN python3 -m venv /opt/venv
COPY --from=ghcr.io/astral-sh/uv:latest@sha256:562193a4a9d398f8aedddcb223e583da394ee735de36b5815f8f1d22cb49be15 /uv /uvx /bin/
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --frozen --extra ${DEVICE} --no-dev --no-editable --no-install-project --compile-bytecode --no-progress
COPY poetry.lock pyproject.toml ./
RUN poetry install --sync --no-interaction --no-ansi --no-root --with ${DEVICE} --without dev
FROM python:3.11-slim-bookworm@sha256:6ed5bff4d7d377e2a27d9285553b8c21cfccc4f00881de1b24c9bc8d90016e82 AS prod-cpu
FROM python:3.11-slim-bookworm@sha256:614c8691ab74150465ec9123378cd4dde7a6e57be9e558c3108df40664667a4c AS prod-cpu
FROM prod-cpu AS prod-openvino
@@ -99,7 +95,7 @@ WORKDIR /usr/src/app
ENV TRANSFORMERS_CACHE=/cache \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PATH="/opt/venv/bin:$PATH" \
PATH="/usr/src/app/.venv/bin:$PATH" \
PYTHONPATH=/usr/src \
DEVICE=${DEVICE}
@@ -108,11 +104,27 @@ RUN echo "hard core 0" >> /etc/security/limits.conf && \
echo "fs.suid_dumpable 0" >> /etc/sysctl.conf && \
echo 'ulimit -S -c 0 > /dev/null 2>&1' >> /etc/profile
COPY --from=builder /opt/venv /opt/venv
COPY --from=builder /usr/src/app/.venv /usr/src/app/.venv
COPY ann/ann.py /usr/src/ann/ann.py
COPY rknn/rknnpool.py /usr/src/rknn/rknnpool.py
COPY start.sh log_conf.json gunicorn_conf.py ./
COPY app .
ARG BUILD_ID
ARG BUILD_IMAGE
ARG BUILD_SOURCE_REF
ARG BUILD_SOURCE_COMMIT
ENV IMMICH_BUILD=${BUILD_ID}
ENV IMMICH_BUILD_URL=https://github.com/immich-app/immich/actions/runs/${BUILD_ID}
ENV IMMICH_BUILD_IMAGE=${BUILD_IMAGE}
ENV IMMICH_BUILD_IMAGE_URL=https://github.com/immich-app/immich/pkgs/container/immich-machine-learning
ENV IMMICH_REPOSITORY=immich-app/immich
ENV IMMICH_REPOSITORY_URL=https://github.com/immich-app/immich
ENV IMMICH_SOURCE_REF=${BUILD_SOURCE_REF}
ENV IMMICH_SOURCE_COMMIT=${BUILD_SOURCE_COMMIT}
ENV IMMICH_SOURCE_URL=https://github.com/immich-app/immich/commit/${BUILD_SOURCE_COMMIT}
ENTRYPOINT ["tini", "--"]
CMD ["./start.sh"]
+14 -12
View File
@@ -5,13 +5,12 @@
# Setup
This project uses [Poetry](https://python-poetry.org/docs/#installation), so be sure to install it first.
Running `poetry install --no-root --with dev --with cpu` will install everything you need in an isolated virtual environment.
CUDA and OpenVINO are supported as acceleration APIs. To use them, you can replace `--with cpu` with either of `--with cuda` or `--with openvino`. In the case of CUDA, a [compute capability](https://developer.nvidia.com/cuda-gpus) of 5.2 or higher is required.
To add or remove dependencies, you can use the commands `poetry add $PACKAGE_NAME` and `poetry remove $PACKAGE_NAME`, respectively.
Be sure to commit the `poetry.lock` and `pyproject.toml` files with `poetry lock --no-update` to reflect any changes in dependencies.
This project uses [uv](https://docs.astral.sh/uv/getting-started/installation/), so be sure to install it first.
Running `uv sync --extra cpu` will install everything you need in an isolated virtual environment.
CUDA and OpenVINO are supported as acceleration APIs. To use them, you can replace `--group cpu` with either of `--group cuda` or `--group openvino`. In the case of CUDA, a [compute capability](https://developer.nvidia.com/cuda-gpus) of 5.2 or higher is required.
To add or remove dependencies, you can use the commands `uv add $PACKAGE_NAME` and `uv remove $PACKAGE_NAME`, respectively.
Be sure to commit the `uv.lock` and `pyproject.toml` files with `uv lock` to reflect any changes in dependencies.
# Load Testing
@@ -19,22 +18,25 @@ To measure inference throughput and latency, you can use [Locust](https://locust
Locust works by querying the model endpoints and aggregating their statistics, meaning the app must be deployed.
You can change the models or adjust options like score thresholds through the Locust UI.
To get started, you can simply run `locust --web-host 127.0.0.1` and open `localhost:8089` in a browser to access the UI. See the [Locust documentation](https://docs.locust.io/en/stable/index.html) for more info on running Locust.
To get started, you can simply run `locust --web-host 127.0.0.1` and open `localhost:8089` in a browser to access the UI. See the [Locust documentation](https://docs.locust.io/en/stable/index.html) for more info on running Locust.
Note that in Locust's jargon, concurrency is measured in `users`, and each user runs one task at a time. To achieve a particular per-endpoint concurrency, multiply that number by the number of endpoints to be queried. For example, if there are 3 endpoints and you want each of them to receive 8 requests at a time, you should set the number of users to 24.
# Facial Recognition
## Acknowledgements
This project utilizes facial recognition models from the [InsightFace](https://github.com/deepinsight/insightface/tree/master/model_zoo) project. We appreciate the work put into developing these models, which have been beneficial to the machine learning part of this project.
### Used Models
* antelopev2
* buffalo_l
* buffalo_m
* buffalo_s
- antelopev2
- buffalo_l
- buffalo_m
- buffalo_s
## License and Use Restrictions
We have received permission to use the InsightFace facial recognition models in our project, as granted via email by Jia Guo (guojia@insightface.ai) on 18th March 2023. However, it's important to note that this permission does not extend to the redistribution or commercial use of their models by third parties. Users and developers interested in using these models should review the licensing terms provided in the InsightFace GitHub repository.
For more information on the capabilities of the InsightFace models and to ensure compliance with their license, please refer to their [official repository](https://github.com/deepinsight/insightface). Adhering to the specified licensing terms is crucial for the respectful and lawful use of their work.
For more information on the capabilities of the InsightFace models and to ensure compliance with their license, please refer to their [official repository](https://github.com/deepinsight/insightface). Adhering to the specified licensing terms is crucial for the respectful and lawful use of their work.
@@ -20,9 +20,8 @@ class FaceRecognizer(InferenceModel):
depends = [(ModelType.DETECTION, ModelTask.FACIAL_RECOGNITION)]
identity = (ModelType.RECOGNITION, ModelTask.FACIAL_RECOGNITION)
def __init__(self, model_name: str, min_score: float = 0.7, **model_kwargs: Any) -> None:
def __init__(self, model_name: str, **model_kwargs: Any) -> None:
super().__init__(model_name, **model_kwargs)
self.min_score = model_kwargs.pop("minScore", min_score)
max_batch_size = settings.max_batch_size.facial_recognition if settings.max_batch_size else None
self.batch_size = max_batch_size if max_batch_size else self._batch_size_default
+8 -5
View File
@@ -344,7 +344,7 @@ class TestAnnSession:
session.run(None, input_feed)
ann_session.return_value.execute.assert_called_once_with(123, [input1, input2])
np_spy.call_count == 2
assert np_spy.call_count == 2
np_spy.assert_has_calls([mock.call(input1), mock.call(input2)])
@@ -507,11 +507,14 @@ class TestCLIP:
class TestFaceRecognition:
def test_set_min_score(self, mocker: MockerFixture) -> None:
mocker.patch.object(FaceRecognizer, "load")
face_recognizer = FaceRecognizer("buffalo_s", cache_dir="test_cache", min_score=0.5)
def test_set_min_score(self, snapshot_download: mock.Mock, ort_session: mock.Mock, path: mock.Mock) -> None:
path.return_value.__truediv__.return_value.__truediv__.return_value.suffix = ".onnx"
assert face_recognizer.min_score == 0.5
face_detector = FaceDetector("buffalo_s", min_score=0.5, cache_dir="test_cache")
face_detector.load()
assert face_detector.min_score == 0.5
assert face_detector.model.det_thresh == 0.5
def test_detection(self, cv_image: cv2.Mat, mocker: MockerFixture) -> None:
mocker.patch.object(FaceDetector, "load")
+1 -7
View File
@@ -14,12 +14,6 @@ byte_image = BytesIO()
def _(parser: ArgumentParser) -> None:
parser.add_argument("--clip-model", type=str, default="ViT-B-32::openai")
parser.add_argument("--face-model", type=str, default="buffalo_l")
parser.add_argument(
"--tag-min-score",
type=int,
default=0.0,
help="Returns all tags at or above this score. The default returns all tags.",
)
parser.add_argument(
"--face-min-score",
type=int,
@@ -74,10 +68,10 @@ class RecognitionFormDataLoadTest(InferenceLoadTest):
"facial-recognition": {
"recognition": {
"modelName": self.environment.parsed_options.face_model,
"options": {"minScore": self.environment.parsed_options.face_min_score},
},
"detection": {
"modelName": self.environment.parsed_options.face_model,
"options": {"minScore": self.environment.parsed_options.face_min_score},
},
}
}
-3828
View File
File diff suppressed because it is too large Load Diff
+64 -65
View File
@@ -1,79 +1,78 @@
[tool.poetry]
[project]
name = "machine-learning"
version = "1.125.1"
version = "1.129.0"
description = ""
authors = ["Hau Tran <alex.tran1502@gmail.com>"]
authors = [{ name = "Hau Tran", email = "alex.tran1502@gmail.com" }]
requires-python = ">=3.10,<4.0"
readme = "README.md"
packages = [{include = "app"}]
dependencies = [
"aiocache>=0.12.1,<1.0",
"fastapi>=0.95.2,<1.0",
"ftfy>=6.1.1",
"gunicorn>=21.1.0",
"huggingface-hub>=0.20.1,<1.0",
"insightface>=0.7.3,<1.0",
"opencv-python-headless>=4.7.0.72,<5.0",
"orjson>=3.9.5",
"pillow>=9.5.0,<11.0",
"pydantic>=2.0.0,<3",
"pydantic-settings>=2.5.2,<3",
"python-multipart>=0.0.6,<1.0",
"rich>=13.4.2",
"tokenizers>=0.15.0,<1.0",
"uvicorn[standard]>=0.22.0,<1.0",
]
[tool.poetry.dependencies]
python = ">=3.10,<4.0"
insightface = ">=0.7.3,<1.0"
opencv-python-headless = ">=4.7.0.72,<5.0"
pillow = ">=9.5.0,<11.0"
fastapi = ">=0.95.2,<1.0"
uvicorn = {extras = ["standard"], version = ">=0.22.0,<1.0"}
pydantic = "^2.0.0"
pydantic-settings = "^2.5.2"
aiocache = ">=0.12.1,<1.0"
rich = ">=13.4.2"
ftfy = ">=6.1.1"
python-multipart = ">=0.0.6,<1.0"
orjson = ">=3.9.5"
gunicorn = ">=21.1.0"
huggingface-hub = ">=0.20.1,<1.0"
tokenizers = ">=0.15.0,<1.0"
[dependency-groups]
test = [
"httpx>=0.24.1",
"pytest>=7.3.1",
"pytest-asyncio>=0.21.0",
"pytest-cov>=4.1.0",
"pytest-mock>=3.11.1",
]
types = [
"types-pyyaml>=6.0.12.20241230",
"types-requests>=2.32.0.20250306",
"types-setuptools>=75.8.2.20250305",
"types-simplejson>=3.20.0.20250218",
"types-ujson>=5.10.0.20240515",
]
lint = [
"black>=23.3.0",
"mypy>=1.3.0",
"ruff>=0.0.272",
{ include-group = "types" },
]
dev = ["locust>=2.15.1", { include-group = "test" }, { include-group = "lint" }]
[tool.poetry.group.dev.dependencies]
mypy = ">=1.3.0"
black = ">=23.3.0"
pytest = ">=7.3.1"
locust = ">=2.15.1"
httpx = ">=0.24.1"
pytest-asyncio = ">=0.21.0"
pytest-cov = ">=4.1.0"
ruff = ">=0.0.272"
pytest-mock = ">=3.11.1"
[project.optional-dependencies]
cpu = ["onnxruntime>=1.15.0,<2"]
cuda = ["onnxruntime-gpu>=1.17.0,<2"]
openvino = ["onnxruntime-openvino>=1.17.1,<1.19.0"]
armnn = ["onnxruntime>=1.15.0,<2"]
rknn = ["onnxruntime>=1.15.0,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
[tool.poetry.group.cpu]
optional = true
[tool.uv]
compile-bytecode = true
[tool.poetry.group.cpu.dependencies]
onnxruntime = "^1.15.0"
[tool.poetry.group.cuda]
optional = true
[tool.poetry.group.cuda.dependencies]
onnxruntime-gpu = {version = "^1.17.0", source = "cuda12"}
[tool.poetry.group.openvino]
optional = true
[tool.poetry.group.openvino.dependencies]
onnxruntime-openvino = ">=1.17.1,<1.19.0"
[tool.poetry.group.armnn]
optional = true
[tool.poetry.group.armnn.dependencies]
onnxruntime = "^1.15.0"
[tool.poetry.group.rknn]
optional = true
[tool.poetry.group.rknn.dependencies]
rknn-toolkit-lite2 = "^2.3.0"
onnxruntime = "^1.15.0"
[[tool.poetry.source]]
[[tool.uv.index]]
name = "cuda12"
url = "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/"
priority = "explicit"
explicit = true
[tool.uv.sources]
onnxruntime-gpu = { index = "cuda12" }
[tool.hatch.build.targets.sdist]
include = ["app"]
[tool.hatch.build.targets.wheel]
include = ["app"]
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.mypy]
python_version = "3.11"
+2
View File
@@ -1,5 +1,7 @@
#!/usr/bin/env sh
echo "Initializing Immich ML $IMMICH_SOURCE_REF"
lib_path="/usr/lib/$(arch)-linux-gnu/libmimalloc.so.2"
# mimalloc seems to increase memory usage dramatically with openvino, need to investigate
if ! [ "$DEVICE" = "openvino" ]; then
+2725
View File
File diff suppressed because it is too large Load Diff