feat(ml)!: cuda and openvino acceleration (#5619)

* cuda and openvino ep, refactor, update dockerfile * updated workflow * typing fixes * added tests * updated ml test gh action * updated README * updated docker-compose * added compute to hwaccel.yml * updated gh matrix updated gh matrix updated gh matrix updated gh matrix updated gh matrix give up * remove cuda/arm64 build * add hwaccel image tags to docker-compose * remove unnecessary quotes * add suffix to git tag * fixed kwargs in base model * armnn ld_library_path * update pyproject.toml * add armnn workflow * formatting * consolidate hwaccel files, update docker compose * update hw transcoding docs * add ml hwaccel docs * update dev and prod docker-compose * added armnn prerequisite docs * support 3.10 * updated docker-compose comments * formatting * test coverage * don't set arena extend strategy for openvino * working openvino * formatting * fix dockerfile * added type annotation * add wsl configuration for openvino * updated lock file * copy python3 * comment out extends section * fix platforms * simplify workflow suffix tagging * simplify aio transcoding doc * update docs and workflow for `hwaccel.yml` change * revert docs
2024-01-21 18:22:39 -05:00
parent 6b419a984c
commit 95cfe22866
23 changed files with 962 additions and 460 deletions
@@ -44,8 +44,8 @@ services:
    command: [ "/usr/src/app/bin/immich-dev", "microservices" ]
    <<: *server-common
    # extends:
-    #   file: hwaccel.yml
-    #   service: hwaccel
+    #   file: hwaccel.transcoding.yml
+    #   service: cpu # set to one of [nvenc, quicksync, rkmpp, vaapi, vaapi-wsl] for accelerated transcoding
    ports:
      - 9231:9230
    depends_on:
@@ -79,9 +79,14 @@ services:
  immich-machine-learning:
    container_name: immich_machine_learning
    image: immich-machine-learning-dev:latest
+    # extends:
+    #   file: hwaccel.ml.yml
+    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
    build:
      context: ../machine-learning
      dockerfile: Dockerfile
+      args:
+        - DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
    ports:
      - 3003:3003
    volumes:
@@ -30,8 +30,8 @@ services:
    command: [ "./start-microservices.sh" ]
    <<: *server-common
    # extends:
-    #   file: hwaccel.yml
-    #   service: hwaccel
+    #   file: hwaccel.transcoding.yml
+    #   service: cpu # set to one of [nvenc, quicksync, rkmpp, vaapi, vaapi-wsl] for accelerated transcoding
    depends_on:
      - redis
      - database
@@ -40,9 +40,14 @@ services:
  immich-machine-learning:
    container_name: immich_machine_learning
    image: immich-machine-learning:latest
+    # extends:
+    #   file: hwaccel.ml.yml
+    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
    build:
      context: ../machine-learning
      dockerfile: Dockerfile
+      args:
+        - DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
    volumes:
      - model-cache:/cache
    env_file:
@@ -30,9 +30,9 @@ services:
  immich-microservices:
    container_name: immich_microservices
    image: ghcr.io/immich-app/immich-server:${IMMICH_VERSION:-release}
-    # extends:
-    #   file: hwaccel.yml
-    #   service: hwaccel
+    # extends: # uncomment this section for hardware acceleration - see https://immich.app/docs/features/hardware-transcoding
+    #   file: hwaccel.transcoding.yml 
+    #   service: cpu # set to one of [nvenc, quicksync, rkmpp, vaapi, vaapi-wsl] for accelerated transcoding
    command: [ "start.sh", "microservices" ]
    volumes:
      - ${UPLOAD_LOCATION}:/usr/src/app/upload
@@ -46,7 +46,12 @@ services:

  immich-machine-learning:
    container_name: immich_machine_learning
+    # For hardware acceleration, add one of -[armnn, cuda, openvino] to the image tag.
+    # Example tag: ${IMMICH_VERSION:-release}-cuda
    image: ghcr.io/immich-app/immich-machine-learning:${IMMICH_VERSION:-release}
+    # extends: # uncomment this section for hardware acceleration - see https://immich.app/docs/features/ml-hardware-acceleration
+    #   file: hwaccel.ml.yml
+    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference - use the `-wsl` version for WSL2 where applicable
    volumes:
      - model-cache:/cache
    env_file:
@@ -1,24 +0,0 @@
-version: "3.8"
-
-# Hardware acceleration for transcoding using RKMPP for Rockchip SOCs
-# This is only needed if you want to use hardware acceleration for transcoding.
-# Supported host OS is Ubuntu Jammy 22.04 with custom ffmpeg from ppa:liujianfeng1994/rockchip-multimedia
-
-services:
-  hwaccel:
-    security_opt: # enables full access to /sys and /proc, still far better than privileged: true
-      - systempaths=unconfined
-      - apparmor=unconfined
-    group_add:
-      - video
-    devices:
-      - /dev/rga:/dev/rga
-      - /dev/dri:/dev/dri
-      - /dev/dma_heap:/dev/dma_heap
-      - /dev/mpp_service:/dev/mpp_service
-    volumes:
-      - /usr/bin/ffmpeg:/usr/bin/ffmpeg_mpp:ro
-      - /lib/aarch64-linux-gnu:/lib/ffmpeg-mpp:ro
-      - /lib/aarch64-linux-gnu/libblas.so.3:/lib/ffmpeg-mpp/libblas.so.3:ro # symlink is resolved by mounting
-      - /lib/aarch64-linux-gnu/liblapack.so.3:/lib/ffmpeg-mpp/liblapack.so.3:ro # symlink is resolved by mounting
-      - /lib/aarch64-linux-gnu/pulseaudio/libpulsecommon-15.99.so:/lib/ffmpeg-mpp/libpulsecommon-15.99.so:ro
@@ -0,0 +1,47 @@
+version: "3.8"
+
+# Configurations for hardware-accelerated machine learning
+
+# If using Unraid or another platform that doesn't allow multiple Compose files,
+# you can inline the config for a backend by copying its contents 
+# into the immich-machine-learning service in the docker-compose.yml file.
+
+# See https://immich.app/docs/features/ml-hardware-acceleration for info on usage.
+
+services:
+  armnn:
+    devices:
+      - /dev/mali0:/dev/mali0
+    volumes:
+      - /lib/firmware/mali_csffw.bin:/lib/firmware/mali_csffw.bin:ro # Mali firmware for your chipset (not always required depending on the driver)
+      - /usr/lib/libmali.so:/usr/lib/libmali.so:ro # Mali driver for your chipset (always required)
+
+  cpu:
+
+  cuda:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities:
+                - gpu
+                - compute
+                - video
+
+  openvino:
+    device_cgroup_rules:
+      - "c 189:* rmw"
+    devices:
+      - /dev/dri:/dev/dri
+    volumes:
+      - /dev/bus/usb:/dev/bus/usb
+
+  openvino-wsl:
+    devices:
+      - /dev/dri:/dev/dri
+      - /dev/dxg:/dev/dxg
+    volumes:
+      - /dev/bus/usb:/dev/bus/usb
+      - /usr/lib/wsl:/usr/lib/wsl
@@ -0,0 +1,59 @@
+version: "3.8"
+
+# Configurations for hardware-accelerated transcoding 
+
+# If using Unraid or another platform that doesn't allow multiple Compose files,
+# you can inline the config for a backend by copying its contents 
+# into the immich-microservices service in the docker-compose.yml file.
+
+# See https://immich.app/docs/features/hardware-transcoding for more info on using hardware transcoding.
+
+services:
+  cpu:
+
+  nvenc:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities:
+                - gpu
+                - compute
+                - video
+
+  quicksync:
+    devices:
+      - /dev/dri:/dev/dri
+
+  rkmpp:
+    security_opt: # enables full access to /sys and /proc, still far better than privileged: true
+      - systempaths=unconfined
+      - apparmor=unconfined
+    group_add:
+      - video
+    devices:
+      - /dev/rga:/dev/rga
+      - /dev/dri:/dev/dri
+      - /dev/dma_heap:/dev/dma_heap
+      - /dev/mpp_service:/dev/mpp_service
+    volumes:
+      - /usr/bin/ffmpeg:/usr/bin/ffmpeg_mpp:ro
+      - /lib/aarch64-linux-gnu:/lib/ffmpeg-mpp:ro
+      - /lib/aarch64-linux-gnu/libblas.so.3:/lib/ffmpeg-mpp/libblas.so.3:ro # symlink is resolved by mounting
+      - /lib/aarch64-linux-gnu/liblapack.so.3:/lib/ffmpeg-mpp/liblapack.so.3:ro # symlink is resolved by mounting
+      - /lib/aarch64-linux-gnu/pulseaudio/libpulsecommon-15.99.so:/lib/ffmpeg-mpp/libpulsecommon-15.99.so:ro
+
+  vaapi:
+    devices:
+      - /dev/dri:/dev/dri
+
+  vaapi-wsl: # use this for VAAPI if you're running Immich in WSL2
+    devices:
+      - /dev/dri:/dev/dri
+    volumes:
+      - /usr/lib/wsl:/usr/lib/wsl
+    environment:
+      - LD_LIBRARY_PATH=/usr/lib/wsl/lib
+      - LIBVA_DRIVER_NAME=d3d12
@@ -1,22 +0,0 @@
-version: "3.8"
-
-# Hardware acceleration for transcoding - Optional
-# This is only needed if you want to use hardware acceleration for transcoding.
-# Depending on your hardware, you should uncomment the relevant lines below.
-
-services:
-  hwaccel:
-    # devices:
-    #   - /dev/dri:/dev/dri  # If using Intel QuickSync or VAAPI
-    # volumes:
-    #   - /usr/lib/wsl:/usr/lib/wsl # If using VAAPI in WSL2
-    # environment:
-    #   - LD_LIBRARY_PATH=/usr/lib/wsl/lib # If using VAAPI in WSL2
-    #   - LIBVA_DRIVER_NAME=d3d12 # If using VAAPI in WSL2
-    # deploy: # Uncomment this section if using NVIDIA GPU
-    #   resources:
-    #     reservations:
-    #       devices:
-    #         - driver: nvidia
-    #           count: 1
-    #           capabilities: [gpu,video]
@@ -1,11 +0,0 @@
-version: "3.8"
-
-# ML acceleration on supported Mali ARM GPUs using ARM-NN
-
-services:
-  mlaccel:
-    devices:
-      - /dev/mali0:/dev/mali0
-    volumes:
-      - /lib/firmware/mali_csffw.bin:/lib/firmware/mali_csffw.bin:ro # Mali firmware for your chipset (not always required depending on the driver)
-      - /usr/lib/libmali.so:/usr/lib/libmali.so:ro # Mali driver for your chipset (always required)