Merge remote-tracking branch 'origin/main' into rknn-toolkit-lite2

2025-03-11 12:42:25 -04:00
parent 6e08f1f371 9c825e15de
commit f5e44f12e1
1075 changed files with 44876 additions and 25256 deletions
@@ -1,6 +1,6 @@
 ARG DEVICE=cpu

-FROM python:3.11-bookworm@sha256:adb581d8ed80edd03efd4dcad66db115b9ce8de8522b01720b9f3e6146f0884c AS builder-cpu
+FROM python:3.11-bookworm@sha256:68a8863d0625f42d47e0684f33ca02f19d6094ef859a8af237aaf645195ed477 AS builder-cpu

 FROM builder-cpu AS builder-openvino

@@ -21,22 +21,18 @@ FROM builder-${DEVICE} AS builder

 ARG DEVICE
 ENV PYTHONDONTWRITEBYTECODE=1 \
-    PYTHONUNBUFFERED=1 \
-    PIP_NO_CACHE_DIR=true \
-    VIRTUAL_ENV="/opt/venv" \
-    PATH="/opt/venv/bin:${PATH}"
+    PYTHONUNBUFFERED=1
+WORKDIR /usr/src/app

 RUN apt-get update && apt-get install -y --no-install-recommends g++

-RUN pip install --upgrade pip && pip install poetry
-RUN poetry config installer.max-workers 10 && \
-    poetry config virtualenvs.create false
-RUN python3 -m venv /opt/venv
+COPY --from=ghcr.io/astral-sh/uv:latest@sha256:562193a4a9d398f8aedddcb223e583da394ee735de36b5815f8f1d22cb49be15 /uv /uvx /bin/
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    uv sync --frozen --extra ${DEVICE} --no-dev --no-editable --no-install-project --compile-bytecode --no-progress

-COPY poetry.lock pyproject.toml ./
-RUN poetry install --sync --no-interaction --no-ansi --no-root --with ${DEVICE} --without dev
-
-FROM python:3.11-slim-bookworm@sha256:6ed5bff4d7d377e2a27d9285553b8c21cfccc4f00881de1b24c9bc8d90016e82 AS prod-cpu
+FROM python:3.11-slim-bookworm@sha256:614c8691ab74150465ec9123378cd4dde7a6e57be9e558c3108df40664667a4c AS prod-cpu

 FROM prod-cpu AS prod-openvino

@@ -99,7 +95,7 @@ WORKDIR /usr/src/app
 ENV TRANSFORMERS_CACHE=/cache \
    PYTHONDONTWRITEBYTECODE=1 \
    PYTHONUNBUFFERED=1 \
-    PATH="/opt/venv/bin:$PATH" \
+    PATH="/usr/src/app/.venv/bin:$PATH" \
    PYTHONPATH=/usr/src \
    DEVICE=${DEVICE}

@@ -108,11 +104,27 @@ RUN echo "hard core 0" >> /etc/security/limits.conf && \
    echo "fs.suid_dumpable 0" >> /etc/sysctl.conf && \
    echo 'ulimit -S -c 0 > /dev/null 2>&1' >> /etc/profile

-COPY --from=builder /opt/venv /opt/venv
+COPY --from=builder /usr/src/app/.venv /usr/src/app/.venv
 COPY ann/ann.py /usr/src/ann/ann.py
 COPY rknn/rknnpool.py /usr/src/rknn/rknnpool.py
 COPY start.sh log_conf.json gunicorn_conf.py ./
 COPY app .
+
+ARG BUILD_ID
+ARG BUILD_IMAGE
+ARG BUILD_SOURCE_REF
+ARG BUILD_SOURCE_COMMIT
+
+ENV IMMICH_BUILD=${BUILD_ID}
+ENV IMMICH_BUILD_URL=https://github.com/immich-app/immich/actions/runs/${BUILD_ID}
+ENV IMMICH_BUILD_IMAGE=${BUILD_IMAGE}
+ENV IMMICH_BUILD_IMAGE_URL=https://github.com/immich-app/immich/pkgs/container/immich-machine-learning
+ENV IMMICH_REPOSITORY=immich-app/immich
+ENV IMMICH_REPOSITORY_URL=https://github.com/immich-app/immich
+ENV IMMICH_SOURCE_REF=${BUILD_SOURCE_REF}
+ENV IMMICH_SOURCE_COMMIT=${BUILD_SOURCE_COMMIT}
+ENV IMMICH_SOURCE_URL=https://github.com/immich-app/immich/commit/${BUILD_SOURCE_COMMIT}
+
 ENTRYPOINT ["tini", "--"]
 CMD ["./start.sh"]

@@ -5,13 +5,12 @@

 # Setup

-This project uses [Poetry](https://python-poetry.org/docs/#installation), so be sure to install it first.
-Running `poetry install --no-root --with dev --with cpu` will install everything you need in an isolated virtual environment.
-CUDA and OpenVINO are supported as acceleration APIs. To use them, you can replace `--with cpu` with either of `--with cuda` or `--with openvino`. In the case of CUDA, a [compute capability](https://developer.nvidia.com/cuda-gpus) of 5.2 or higher is required.
-
-To add or remove dependencies, you can use the commands `poetry add $PACKAGE_NAME` and `poetry remove $PACKAGE_NAME`, respectively.
-Be sure to commit the `poetry.lock` and `pyproject.toml` files with `poetry lock --no-update` to reflect any changes in dependencies.
+This project uses [uv](https://docs.astral.sh/uv/getting-started/installation/), so be sure to install it first.
+Running `uv sync --extra cpu` will install everything you need in an isolated virtual environment.
+CUDA and OpenVINO are supported as acceleration APIs. To use them, you can replace `--group cpu` with either of `--group cuda` or `--group openvino`. In the case of CUDA, a [compute capability](https://developer.nvidia.com/cuda-gpus) of 5.2 or higher is required.

+To add or remove dependencies, you can use the commands `uv add $PACKAGE_NAME` and `uv remove $PACKAGE_NAME`, respectively.
+Be sure to commit the `uv.lock` and `pyproject.toml` files with `uv lock` to reflect any changes in dependencies.

 # Load Testing

@@ -19,22 +18,25 @@ To measure inference throughput and latency, you can use [Locust](https://locust
 Locust works by querying the model endpoints and aggregating their statistics, meaning the app must be deployed.
 You can change the models or adjust options like score thresholds through the Locust UI.

-To get started, you can simply run `locust --web-host 127.0.0.1` and open `localhost:8089` in a browser to access the UI. See the [Locust documentation](https://docs.locust.io/en/stable/index.html) for more info on running Locust. 
+To get started, you can simply run `locust --web-host 127.0.0.1` and open `localhost:8089` in a browser to access the UI. See the [Locust documentation](https://docs.locust.io/en/stable/index.html) for more info on running Locust.

 Note that in Locust's jargon, concurrency is measured in `users`, and each user runs one task at a time. To achieve a particular per-endpoint concurrency, multiply that number by the number of endpoints to be queried. For example, if there are 3 endpoints and you want each of them to receive 8 requests at a time, you should set the number of users to 24.

 # Facial Recognition

 ## Acknowledgements
+
 This project utilizes facial recognition models from the [InsightFace](https://github.com/deepinsight/insightface/tree/master/model_zoo) project. We appreciate the work put into developing these models, which have been beneficial to the machine learning part of this project.

 ### Used Models
-* antelopev2
-* buffalo_l
-* buffalo_m
-* buffalo_s
+
+- antelopev2
+- buffalo_l
+- buffalo_m
+- buffalo_s

 ## License and Use Restrictions
+
 We have received permission to use the InsightFace facial recognition models in our project, as granted via email by Jia Guo (guojia@insightface.ai) on 18th March 2023. However, it's important to note that this permission does not extend to the redistribution or commercial use of their models by third parties. Users and developers interested in using these models should review the licensing terms provided in the InsightFace GitHub repository.

-For more information on the capabilities of the InsightFace models and to ensure compliance with their license, please refer to their [official repository](https://github.com/deepinsight/insightface). Adhering to the specified licensing terms is crucial for the respectful and lawful use of their work.
+For more information on the capabilities of the InsightFace models and to ensure compliance with their license, please refer to their [official repository](https://github.com/deepinsight/insightface). Adhering to the specified licensing terms is crucial for the respectful and lawful use of their work.
@@ -20,9 +20,8 @@ class FaceRecognizer(InferenceModel):
    depends = [(ModelType.DETECTION, ModelTask.FACIAL_RECOGNITION)]
    identity = (ModelType.RECOGNITION, ModelTask.FACIAL_RECOGNITION)

-    def __init__(self, model_name: str, min_score: float = 0.7, **model_kwargs: Any) -> None:
+    def __init__(self, model_name: str, **model_kwargs: Any) -> None:
        super().__init__(model_name, **model_kwargs)
-        self.min_score = model_kwargs.pop("minScore", min_score)
        max_batch_size = settings.max_batch_size.facial_recognition if settings.max_batch_size else None
        self.batch_size = max_batch_size if max_batch_size else self._batch_size_default

@@ -344,7 +344,7 @@ class TestAnnSession:
        session.run(None, input_feed)

        ann_session.return_value.execute.assert_called_once_with(123, [input1, input2])
-        np_spy.call_count == 2
+        assert np_spy.call_count == 2
        np_spy.assert_has_calls([mock.call(input1), mock.call(input2)])


@@ -507,11 +507,14 @@ class TestCLIP:


 class TestFaceRecognition:
-    def test_set_min_score(self, mocker: MockerFixture) -> None:
-        mocker.patch.object(FaceRecognizer, "load")
-        face_recognizer = FaceRecognizer("buffalo_s", cache_dir="test_cache", min_score=0.5)
+    def test_set_min_score(self, snapshot_download: mock.Mock, ort_session: mock.Mock, path: mock.Mock) -> None:
+        path.return_value.__truediv__.return_value.__truediv__.return_value.suffix = ".onnx"

-        assert face_recognizer.min_score == 0.5
+        face_detector = FaceDetector("buffalo_s", min_score=0.5, cache_dir="test_cache")
+        face_detector.load()
+
+        assert face_detector.min_score == 0.5
+        assert face_detector.model.det_thresh == 0.5

    def test_detection(self, cv_image: cv2.Mat, mocker: MockerFixture) -> None:
        mocker.patch.object(FaceDetector, "load")
@@ -14,12 +14,6 @@ byte_image = BytesIO()
 def _(parser: ArgumentParser) -> None:
    parser.add_argument("--clip-model", type=str, default="ViT-B-32::openai")
    parser.add_argument("--face-model", type=str, default="buffalo_l")
-    parser.add_argument(
-        "--tag-min-score",
-        type=int,
-        default=0.0,
-        help="Returns all tags at or above this score. The default returns all tags.",
-    )
    parser.add_argument(
        "--face-min-score",
        type=int,
@@ -74,10 +68,10 @@ class RecognitionFormDataLoadTest(InferenceLoadTest):
            "facial-recognition": {
                "recognition": {
                    "modelName": self.environment.parsed_options.face_model,
-                    "options": {"minScore": self.environment.parsed_options.face_min_score},
                },
                "detection": {
                    "modelName": self.environment.parsed_options.face_model,
+                    "options": {"minScore": self.environment.parsed_options.face_min_score},
                },
            }
        }
@@ -1,79 +1,78 @@
-[tool.poetry]
+[project]
 name = "machine-learning"
-version = "1.125.1"
+version = "1.129.0"
 description = ""
-authors = ["Hau Tran <alex.tran1502@gmail.com>"]
+authors = [{ name = "Hau Tran", email = "alex.tran1502@gmail.com" }]
+requires-python = ">=3.10,<4.0"
 readme = "README.md"
-packages = [{include = "app"}]
+dependencies = [
+    "aiocache>=0.12.1,<1.0",
+    "fastapi>=0.95.2,<1.0",
+    "ftfy>=6.1.1",
+    "gunicorn>=21.1.0",
+    "huggingface-hub>=0.20.1,<1.0",
+    "insightface>=0.7.3,<1.0",
+    "opencv-python-headless>=4.7.0.72,<5.0",
+    "orjson>=3.9.5",
+    "pillow>=9.5.0,<11.0",
+    "pydantic>=2.0.0,<3",
+    "pydantic-settings>=2.5.2,<3",
+    "python-multipart>=0.0.6,<1.0",
+    "rich>=13.4.2",
+    "tokenizers>=0.15.0,<1.0",
+    "uvicorn[standard]>=0.22.0,<1.0",
+]

-[tool.poetry.dependencies]
-python = ">=3.10,<4.0"
-insightface = ">=0.7.3,<1.0"
-opencv-python-headless = ">=4.7.0.72,<5.0"
-pillow = ">=9.5.0,<11.0"
-fastapi = ">=0.95.2,<1.0"
-uvicorn = {extras = ["standard"], version = ">=0.22.0,<1.0"}
-pydantic = "^2.0.0"
-pydantic-settings = "^2.5.2"
-aiocache = ">=0.12.1,<1.0"
-rich = ">=13.4.2"
-ftfy = ">=6.1.1"
-python-multipart = ">=0.0.6,<1.0"
-orjson = ">=3.9.5"
-gunicorn = ">=21.1.0"
-huggingface-hub = ">=0.20.1,<1.0"
-tokenizers = ">=0.15.0,<1.0"
+[dependency-groups]
+test = [
+    "httpx>=0.24.1",
+    "pytest>=7.3.1",
+    "pytest-asyncio>=0.21.0",
+    "pytest-cov>=4.1.0",
+    "pytest-mock>=3.11.1",
+]
+types = [
+    "types-pyyaml>=6.0.12.20241230",
+    "types-requests>=2.32.0.20250306",
+    "types-setuptools>=75.8.2.20250305",
+    "types-simplejson>=3.20.0.20250218",
+    "types-ujson>=5.10.0.20240515",
+]
+lint = [
+    "black>=23.3.0",
+    "mypy>=1.3.0",
+    "ruff>=0.0.272",
+    { include-group = "types" },
+]
+dev = ["locust>=2.15.1", { include-group = "test" }, { include-group = "lint" }]

-[tool.poetry.group.dev.dependencies]
-mypy = ">=1.3.0"
-black = ">=23.3.0"
-pytest = ">=7.3.1"
-locust = ">=2.15.1"
-httpx = ">=0.24.1"
-pytest-asyncio = ">=0.21.0"
-pytest-cov = ">=4.1.0"
-ruff = ">=0.0.272"
-pytest-mock = ">=3.11.1"
+[project.optional-dependencies]
+cpu = ["onnxruntime>=1.15.0,<2"]
+cuda = ["onnxruntime-gpu>=1.17.0,<2"]
+openvino = ["onnxruntime-openvino>=1.17.1,<1.19.0"]
+armnn = ["onnxruntime>=1.15.0,<2"]
+rknn = ["onnxruntime>=1.15.0,<2", "rknn-toolkit-lite2>=2.3.0,<3"]

-[tool.poetry.group.cpu]
-optional = true
+[tool.uv]
+compile-bytecode = true

-[tool.poetry.group.cpu.dependencies]
-onnxruntime = "^1.15.0"
-
-[tool.poetry.group.cuda]
-optional = true
-
-[tool.poetry.group.cuda.dependencies]
-onnxruntime-gpu = {version = "^1.17.0", source = "cuda12"}
-
-[tool.poetry.group.openvino]
-optional = true
-
-[tool.poetry.group.openvino.dependencies]
-onnxruntime-openvino = ">=1.17.1,<1.19.0"
-
-[tool.poetry.group.armnn]
-optional = true
-
-[tool.poetry.group.armnn.dependencies]
-onnxruntime = "^1.15.0"
-
-[tool.poetry.group.rknn]
-optional = true
-
-[tool.poetry.group.rknn.dependencies]
-rknn-toolkit-lite2 = "^2.3.0"
-onnxruntime = "^1.15.0"
-
-[[tool.poetry.source]]
+[[tool.uv.index]]
 name = "cuda12"
 url = "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/"
-priority = "explicit"
+explicit = true
+
+[tool.uv.sources]
+onnxruntime-gpu = { index = "cuda12" }
+
+[tool.hatch.build.targets.sdist]
+include = ["app"]
+
+[tool.hatch.build.targets.wheel]
+include = ["app"]

 [build-system]
-requires = ["poetry-core"]
-build-backend = "poetry.core.masonry.api"
+requires = ["hatchling"]
+build-backend = "hatchling.build"

 [tool.mypy]
 python_version = "3.11"
@@ -1,5 +1,7 @@
 #!/usr/bin/env sh

+echo "Initializing Immich ML $IMMICH_SOURCE_REF"
+
 lib_path="/usr/lib/$(arch)-linux-gnu/libmimalloc.so.2"
 # mimalloc seems to increase memory usage dramatically with openvino, need to investigate
 if ! [ "$DEVICE" = "openvino" ]; then