docs: model benchmarks (#17036)
* model benchmarks * minor fixes * formatting * docs build * maybe fix reference * clarify optimal * use emojis * wording * wording * clarify optimal wording * bolding * more detailed instructions * clarify edge case fix * early exit in dim loop
This commit is contained in:
@@ -8,6 +8,11 @@ class ModelSource(StrEnum):
|
||||
OPENCLIP = "openclip"
|
||||
|
||||
|
||||
class ModelTask(StrEnum):
|
||||
FACIAL_RECOGNITION = "facial-recognition"
|
||||
SEARCH = "clip"
|
||||
|
||||
|
||||
class SourceMetadata(NamedTuple):
|
||||
name: str
|
||||
link: str
|
||||
@@ -22,6 +27,13 @@ SOURCE_TO_METADATA = {
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
SOURCE_TO_TASK = {
|
||||
ModelSource.MCLIP: ModelTask.SEARCH,
|
||||
ModelSource.OPENCLIP: ModelTask.SEARCH,
|
||||
ModelSource.INSIGHTFACE: ModelTask.FACIAL_RECOGNITION,
|
||||
}
|
||||
|
||||
RKNN_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
|
||||
|
||||
|
||||
|
||||
@@ -5,16 +5,16 @@ from .models import mclip, openclip
|
||||
|
||||
|
||||
def export(
|
||||
model_name: str, model_source: ModelSource, output_dir: Path, opset_version: int = 19, no_cache: bool = False
|
||||
model_name: str, model_source: ModelSource, output_dir: Path, opset_version: int = 19, cache: bool = True
|
||||
) -> None:
|
||||
visual_dir = output_dir / "visual"
|
||||
textual_dir = output_dir / "textual"
|
||||
match model_source:
|
||||
case ModelSource.MCLIP:
|
||||
mclip.to_onnx(model_name, opset_version, visual_dir, textual_dir, no_cache=no_cache)
|
||||
mclip.to_onnx(model_name, opset_version, visual_dir, textual_dir, cache=cache)
|
||||
case ModelSource.OPENCLIP:
|
||||
name, _, pretrained = model_name.partition("__")
|
||||
config = openclip.OpenCLIPModelConfig(name, pretrained)
|
||||
openclip.to_onnx(config, opset_version, visual_dir, textual_dir, no_cache=no_cache)
|
||||
openclip.to_onnx(config, opset_version, visual_dir, textual_dir, cache=cache)
|
||||
case _:
|
||||
raise ValueError(f"Unsupported model source {model_source}")
|
||||
|
||||
@@ -19,10 +19,10 @@ def to_onnx(
|
||||
opset_version: int,
|
||||
output_dir_visual: Path | str,
|
||||
output_dir_textual: Path | str,
|
||||
no_cache: bool = False,
|
||||
cache: bool = True,
|
||||
) -> tuple[Path, Path]:
|
||||
textual_path = get_model_path(output_dir_textual)
|
||||
if no_cache or not textual_path.exists():
|
||||
if not cache or not textual_path.exists():
|
||||
import torch
|
||||
from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
|
||||
from transformers import AutoTokenizer
|
||||
@@ -39,9 +39,7 @@ def to_onnx(
|
||||
_export_text_encoder(model, textual_path, opset_version)
|
||||
else:
|
||||
print(f"Model {textual_path} already exists, skipping")
|
||||
visual_path, _ = openclip_to_onnx(
|
||||
_MCLIP_TO_OPENCLIP[model_name], opset_version, output_dir_visual, no_cache=no_cache
|
||||
)
|
||||
visual_path, _ = openclip_to_onnx(_MCLIP_TO_OPENCLIP[model_name], opset_version, output_dir_visual, cache=cache)
|
||||
assert visual_path is not None, "Visual model export failed"
|
||||
return visual_path, textual_path
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ def to_onnx(
|
||||
opset_version: int,
|
||||
output_dir_visual: Path | str | None = None,
|
||||
output_dir_textual: Path | str | None = None,
|
||||
no_cache: bool = False,
|
||||
cache: bool = True,
|
||||
) -> tuple[Path | None, Path | None]:
|
||||
visual_path = None
|
||||
textual_path = None
|
||||
@@ -49,9 +49,7 @@ def to_onnx(
|
||||
output_dir_textual = Path(output_dir_textual)
|
||||
textual_path = get_model_path(output_dir_textual)
|
||||
|
||||
if not no_cache and (
|
||||
(textual_path is None or textual_path.exists()) and (visual_path is None or visual_path.exists())
|
||||
):
|
||||
if cache and ((textual_path is None or textual_path.exists()) and (visual_path is None or visual_path.exists())):
|
||||
print(f"Models {textual_path} and {visual_path} already exist, skipping")
|
||||
return visual_path, textual_path
|
||||
|
||||
@@ -75,7 +73,7 @@ def to_onnx(
|
||||
param.requires_grad_(False)
|
||||
|
||||
if visual_path is not None and output_dir_visual is not None:
|
||||
if no_cache or not visual_path.exists():
|
||||
if not cache or not visual_path.exists():
|
||||
save_config(
|
||||
open_clip.get_model_preprocess_cfg(model),
|
||||
output_dir_visual / "preprocess_cfg.json",
|
||||
@@ -86,7 +84,7 @@ def to_onnx(
|
||||
print(f"Model {visual_path} already exists, skipping")
|
||||
|
||||
if textual_path is not None and output_dir_textual is not None:
|
||||
if no_cache or not textual_path.exists():
|
||||
if not cache or not textual_path.exists():
|
||||
tokenizer_name = text_vision_cfg["text_cfg"].get("hf_tokenizer_name", "openai/clip-vit-base-patch32")
|
||||
AutoTokenizer.from_pretrained(tokenizer_name).save_pretrained(output_dir_textual)
|
||||
_export_text_encoder(model, model_cfg, textual_path, opset_version)
|
||||
|
||||
@@ -9,13 +9,13 @@ def _export_platform(
|
||||
inputs: list[str] | None = None,
|
||||
input_size_list: list[list[int]] | None = None,
|
||||
fuse_matmul_softmax_matmul_to_sdpa: bool = True,
|
||||
no_cache: bool = False,
|
||||
cache: bool = True,
|
||||
) -> None:
|
||||
from rknn.api import RKNN
|
||||
|
||||
input_path = model_dir / "model.onnx"
|
||||
output_path = model_dir / "rknpu" / target_platform / "model.rknn"
|
||||
if not no_cache and output_path.exists():
|
||||
if cache and output_path.exists():
|
||||
print(f"Model {input_path} already exists at {output_path}, skipping")
|
||||
return
|
||||
|
||||
@@ -49,7 +49,7 @@ def _export_platforms(
|
||||
model_dir: Path,
|
||||
inputs: list[str] | None = None,
|
||||
input_size_list: list[list[int]] | None = None,
|
||||
no_cache: bool = False,
|
||||
cache: bool = True,
|
||||
) -> None:
|
||||
fuse_matmul_softmax_matmul_to_sdpa = True
|
||||
for soc in RKNN_SOCS:
|
||||
@@ -60,7 +60,7 @@ def _export_platforms(
|
||||
inputs=inputs,
|
||||
input_size_list=input_size_list,
|
||||
fuse_matmul_softmax_matmul_to_sdpa=fuse_matmul_softmax_matmul_to_sdpa,
|
||||
no_cache=no_cache,
|
||||
cache=cache,
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Failed to export model for {soc}: {e}")
|
||||
@@ -73,24 +73,24 @@ def _export_platforms(
|
||||
inputs=inputs,
|
||||
input_size_list=input_size_list,
|
||||
fuse_matmul_softmax_matmul_to_sdpa=fuse_matmul_softmax_matmul_to_sdpa,
|
||||
no_cache=no_cache,
|
||||
cache=cache,
|
||||
)
|
||||
|
||||
|
||||
def export(model_dir: Path, no_cache: bool = False) -> None:
|
||||
def export(model_dir: Path, cache: bool = True) -> None:
|
||||
textual = model_dir / "textual"
|
||||
visual = model_dir / "visual"
|
||||
detection = model_dir / "detection"
|
||||
recognition = model_dir / "recognition"
|
||||
|
||||
if textual.is_dir():
|
||||
_export_platforms(textual, no_cache=no_cache)
|
||||
_export_platforms(textual, cache=cache)
|
||||
|
||||
if visual.is_dir():
|
||||
_export_platforms(visual, no_cache=no_cache)
|
||||
_export_platforms(visual, cache=cache)
|
||||
|
||||
if detection.is_dir():
|
||||
_export_platforms(detection, inputs=["input.1"], input_size_list=[[1, 3, 640, 640]], no_cache=no_cache)
|
||||
_export_platforms(detection, inputs=["input.1"], input_size_list=[[1, 3, 640, 640]], cache=cache)
|
||||
|
||||
if recognition.is_dir():
|
||||
_export_platforms(recognition, inputs=["input.1"], input_size_list=[[1, 3, 112, 112]], no_cache=no_cache)
|
||||
_export_platforms(recognition, inputs=["input.1"], input_size_list=[[1, 3, 112, 112]], cache=cache)
|
||||
|
||||
Reference in New Issue
Block a user