This commit is contained in:
Yoni Yang
2024-11-29 07:42:09 +00:00
parent 56d2309122
commit 8ef3e49f74
10 changed files with 583 additions and 81 deletions

View File

@@ -44,6 +44,7 @@ class Settings(BaseSettings):
ann: bool = True
ann_fp16_turbo: bool = False
ann_tuning_level: int = 2
rknn: bool = True
preload: PreloadModelData | None = None
max_batch_size: MaxBatchSize | None = None

View File

@@ -9,6 +9,7 @@ from huggingface_hub import snapshot_download
import ann.ann
from app.sessions.ort import OrtSession
from app.sessions.rknn import RknnSession
from ..config import clean_name, log, settings
from ..schemas import ModelFormat, ModelIdentity, ModelSession, ModelTask, ModelType
@@ -108,6 +109,8 @@ class InferenceModel(ABC):
session: ModelSession = AnnSession(model_path)
case ".onnx":
session = OrtSession(model_path)
case ".rknn":
session = RknnSession(model_path)
case _:
raise ValueError(f"Unsupported model file type: {model_path.suffix}")
return session
@@ -155,4 +158,9 @@ class InferenceModel(ABC):
@property
def _model_format_default(self) -> ModelFormat:
return ModelFormat.ARMNN if ann.ann.is_available and settings.ann else ModelFormat.ONNX
if settings.rknn:
return ModelFormat.RKNN
elif ann.ann.is_available and settings.ann :
return ModelFormat.ARMNN
else:
return ModelFormat.ONNX

View File

@@ -35,6 +35,7 @@ class ModelType(StrEnum):
class ModelFormat(StrEnum):
ARMNN = "armnn"
ONNX = "onnx"
RKNN = "RKNN"
class ModelSource(StrEnum):

View File

@@ -0,0 +1,63 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, List
import numpy as np
from numpy.typing import NDArray
from rknn.api import RKNN # Importing RKNN API
from app.models.constants import SUPPORTED_PROVIDERS
from app.schemas import SessionNode
from ..config import log, settings
class RknnSession:
def __init__(self, model_path: Path | str):
self.model_path = Path(model_path)
self.rknn = RKNN() # Initialize RKNN object
# Load the RKNN model
log.info(f"Loading RKNN model from {self.model_path}")
self._load_model()
def _load_model(self) -> None:
ret = self.rknn.load_rknn(self.model_path.as_posix())
if ret != 0:
raise RuntimeError("Failed to load RKNN model")
ret = self.rknn.init_runtime()
if ret != 0:
raise RuntimeError("Failed to initialize RKNN runtime")
def get_inputs(self) -> List[SessionNode]:
input_attrs = self.rknn.query_inputs()
return input_attrs # RKNN does not provide direct SessionNode equivalent
def get_outputs(self) -> List[SessionNode]:
output_attrs = self.rknn.query_outputs()
return output_attrs
def run(
self,
input_feed: dict[str, NDArray[np.float32] | NDArray[np.int32]],
) -> List[NDArray[np.float32]]:
inputs = [v for v in input_feed.values()]
# Run inference
log.debug(f"Running inference on RKNN model")
ret, outputs = self.rknn.inference(inputs=inputs)
if ret != 0:
raise RuntimeError("Inference failed")
return outputs
def release(self) -> None:
log.info("Releasing RKNN resources")
self.rknn.release()
# Example Usage:
# session = RknnSession(model_path="path/to/model.rknn")
# outputs = session.run(input_feed={"input_name": input_data})
# session.release()