docs: model benchmarks (#17036)

* model benchmarks

* minor fixes

* formatting

* docs build

* maybe fix reference

* clarify optimal

* use emojis

* wording

* wording

* clarify optimal wording

* bolding

* more detailed instructions

* clarify edge case fix

* early exit in dim loop
This commit is contained in:
Mert
2025-03-24 12:02:33 -04:00
committed by GitHub
parent ad151130f9
commit 4bfef2460a
16 changed files with 2209 additions and 255 deletions
@@ -8,6 +8,11 @@ class ModelSource(StrEnum):
OPENCLIP = "openclip"
class ModelTask(StrEnum):
FACIAL_RECOGNITION = "facial-recognition"
SEARCH = "clip"
class SourceMetadata(NamedTuple):
name: str
link: str
@@ -22,6 +27,13 @@ SOURCE_TO_METADATA = {
),
}
SOURCE_TO_TASK = {
ModelSource.MCLIP: ModelTask.SEARCH,
ModelSource.OPENCLIP: ModelTask.SEARCH,
ModelSource.INSIGHTFACE: ModelTask.FACIAL_RECOGNITION,
}
RKNN_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
@@ -5,16 +5,16 @@ from .models import mclip, openclip
def export(
model_name: str, model_source: ModelSource, output_dir: Path, opset_version: int = 19, no_cache: bool = False
model_name: str, model_source: ModelSource, output_dir: Path, opset_version: int = 19, cache: bool = True
) -> None:
visual_dir = output_dir / "visual"
textual_dir = output_dir / "textual"
match model_source:
case ModelSource.MCLIP:
mclip.to_onnx(model_name, opset_version, visual_dir, textual_dir, no_cache=no_cache)
mclip.to_onnx(model_name, opset_version, visual_dir, textual_dir, cache=cache)
case ModelSource.OPENCLIP:
name, _, pretrained = model_name.partition("__")
config = openclip.OpenCLIPModelConfig(name, pretrained)
openclip.to_onnx(config, opset_version, visual_dir, textual_dir, no_cache=no_cache)
openclip.to_onnx(config, opset_version, visual_dir, textual_dir, cache=cache)
case _:
raise ValueError(f"Unsupported model source {model_source}")
@@ -19,10 +19,10 @@ def to_onnx(
opset_version: int,
output_dir_visual: Path | str,
output_dir_textual: Path | str,
no_cache: bool = False,
cache: bool = True,
) -> tuple[Path, Path]:
textual_path = get_model_path(output_dir_textual)
if no_cache or not textual_path.exists():
if not cache or not textual_path.exists():
import torch
from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
from transformers import AutoTokenizer
@@ -39,9 +39,7 @@ def to_onnx(
_export_text_encoder(model, textual_path, opset_version)
else:
print(f"Model {textual_path} already exists, skipping")
visual_path, _ = openclip_to_onnx(
_MCLIP_TO_OPENCLIP[model_name], opset_version, output_dir_visual, no_cache=no_cache
)
visual_path, _ = openclip_to_onnx(_MCLIP_TO_OPENCLIP[model_name], opset_version, output_dir_visual, cache=cache)
assert visual_path is not None, "Visual model export failed"
return visual_path, textual_path
@@ -37,7 +37,7 @@ def to_onnx(
opset_version: int,
output_dir_visual: Path | str | None = None,
output_dir_textual: Path | str | None = None,
no_cache: bool = False,
cache: bool = True,
) -> tuple[Path | None, Path | None]:
visual_path = None
textual_path = None
@@ -49,9 +49,7 @@ def to_onnx(
output_dir_textual = Path(output_dir_textual)
textual_path = get_model_path(output_dir_textual)
if not no_cache and (
(textual_path is None or textual_path.exists()) and (visual_path is None or visual_path.exists())
):
if cache and ((textual_path is None or textual_path.exists()) and (visual_path is None or visual_path.exists())):
print(f"Models {textual_path} and {visual_path} already exist, skipping")
return visual_path, textual_path
@@ -75,7 +73,7 @@ def to_onnx(
param.requires_grad_(False)
if visual_path is not None and output_dir_visual is not None:
if no_cache or not visual_path.exists():
if not cache or not visual_path.exists():
save_config(
open_clip.get_model_preprocess_cfg(model),
output_dir_visual / "preprocess_cfg.json",
@@ -86,7 +84,7 @@ def to_onnx(
print(f"Model {visual_path} already exists, skipping")
if textual_path is not None and output_dir_textual is not None:
if no_cache or not textual_path.exists():
if not cache or not textual_path.exists():
tokenizer_name = text_vision_cfg["text_cfg"].get("hf_tokenizer_name", "openai/clip-vit-base-patch32")
AutoTokenizer.from_pretrained(tokenizer_name).save_pretrained(output_dir_textual)
_export_text_encoder(model, model_cfg, textual_path, opset_version)
@@ -9,13 +9,13 @@ def _export_platform(
inputs: list[str] | None = None,
input_size_list: list[list[int]] | None = None,
fuse_matmul_softmax_matmul_to_sdpa: bool = True,
no_cache: bool = False,
cache: bool = True,
) -> None:
from rknn.api import RKNN
input_path = model_dir / "model.onnx"
output_path = model_dir / "rknpu" / target_platform / "model.rknn"
if not no_cache and output_path.exists():
if cache and output_path.exists():
print(f"Model {input_path} already exists at {output_path}, skipping")
return
@@ -49,7 +49,7 @@ def _export_platforms(
model_dir: Path,
inputs: list[str] | None = None,
input_size_list: list[list[int]] | None = None,
no_cache: bool = False,
cache: bool = True,
) -> None:
fuse_matmul_softmax_matmul_to_sdpa = True
for soc in RKNN_SOCS:
@@ -60,7 +60,7 @@ def _export_platforms(
inputs=inputs,
input_size_list=input_size_list,
fuse_matmul_softmax_matmul_to_sdpa=fuse_matmul_softmax_matmul_to_sdpa,
no_cache=no_cache,
cache=cache,
)
except Exception as e:
print(f"Failed to export model for {soc}: {e}")
@@ -73,24 +73,24 @@ def _export_platforms(
inputs=inputs,
input_size_list=input_size_list,
fuse_matmul_softmax_matmul_to_sdpa=fuse_matmul_softmax_matmul_to_sdpa,
no_cache=no_cache,
cache=cache,
)
def export(model_dir: Path, no_cache: bool = False) -> None:
def export(model_dir: Path, cache: bool = True) -> None:
textual = model_dir / "textual"
visual = model_dir / "visual"
detection = model_dir / "detection"
recognition = model_dir / "recognition"
if textual.is_dir():
_export_platforms(textual, no_cache=no_cache)
_export_platforms(textual, cache=cache)
if visual.is_dir():
_export_platforms(visual, no_cache=no_cache)
_export_platforms(visual, cache=cache)
if detection.is_dir():
_export_platforms(detection, inputs=["input.1"], input_size_list=[[1, 3, 640, 640]], no_cache=no_cache)
_export_platforms(detection, inputs=["input.1"], input_size_list=[[1, 3, 640, 640]], cache=cache)
if recognition.is_dir():
_export_platforms(recognition, inputs=["input.1"], input_size_list=[[1, 3, 112, 112]], no_cache=no_cache)
_export_platforms(recognition, inputs=["input.1"], input_size_list=[[1, 3, 112, 112]], cache=cache)