fix(server): more robust person thumbnail generation (#17974)

* more robust person thumbnail generation

* clamp bounding boxes

* update sql

* no need to process invalid images after decoding

* cursed knowledge

* new line
This commit is contained in:
Mert
2025-05-06 14:18:22 -04:00
committed by GitHub
parent d33ce13561
commit 2a80251dc3
9 changed files with 491 additions and 223 deletions
+100 -1
View File
@@ -1,5 +1,5 @@
import { Injectable } from '@nestjs/common';
import { JOBS_ASSET_PAGINATION_SIZE } from 'src/constants';
import { FACE_THUMBNAIL_SIZE, JOBS_ASSET_PAGINATION_SIZE } from 'src/constants';
import { StorageCore, ThumbnailPathEntity } from 'src/cores/storage.core';
import { Exif } from 'src/database';
import { OnEvent, OnJob } from 'src/decorators';
@@ -11,6 +11,7 @@ import {
AssetVisibility,
AudioCodec,
Colorspace,
ImageFormat,
JobName,
JobStatus,
LogLevel,
@@ -24,10 +25,13 @@ import {
VideoContainer,
} from 'src/enum';
import { UpsertFileOptions } from 'src/repositories/asset.repository';
import { BoundingBox } from 'src/repositories/machine-learning.repository';
import { BaseService } from 'src/services/base.service';
import {
AudioStreamInfo,
CropOptions,
DecodeToBufferOptions,
ImageDimensions,
JobItem,
JobOf,
VideoFormat,
@@ -37,6 +41,7 @@ import {
import { getAssetFiles } from 'src/utils/asset.util';
import { BaseConfig, ThumbnailConfig } from 'src/utils/media';
import { mimeTypes } from 'src/utils/mime-types';
import { clamp, isFaceImportEnabled, isFacialRecognitionEnabled } from 'src/utils/misc';
@Injectable()
export class MediaService extends BaseService {
@@ -308,6 +313,100 @@ export class MediaService extends BaseService {
return { previewPath, thumbnailPath, fullsizePath, thumbhash: outputs[0] as Buffer };
}
@OnJob({ name: JobName.GENERATE_PERSON_THUMBNAIL, queue: QueueName.THUMBNAIL_GENERATION })
async handleGeneratePersonThumbnail({ id }: JobOf<JobName.GENERATE_PERSON_THUMBNAIL>): Promise<JobStatus> {
const { machineLearning, metadata, image } = await this.getConfig({ withCache: true });
if (!isFacialRecognitionEnabled(machineLearning) && !isFaceImportEnabled(metadata)) {
return JobStatus.SKIPPED;
}
const data = await this.personRepository.getDataForThumbnailGenerationJob(id);
if (!data) {
this.logger.error(`Could not generate person thumbnail for ${id}: missing data`);
return JobStatus.FAILED;
}
const { ownerId, x1, y1, x2, y2, oldWidth, oldHeight, exifOrientation, previewPath, originalPath } = data;
let inputImage: string | Buffer;
if (mimeTypes.isVideo(originalPath)) {
if (!previewPath) {
this.logger.error(`Could not generate person thumbnail for video ${id}: missing preview path`);
return JobStatus.FAILED;
}
inputImage = previewPath;
}
if (image.extractEmbedded && mimeTypes.isRaw(originalPath)) {
const extracted = await this.extractImage(originalPath, image.preview.size);
inputImage = extracted ? extracted.buffer : originalPath;
} else {
inputImage = originalPath;
}
const { data: decodedImage, info } = await this.mediaRepository.decodeImage(inputImage, {
colorspace: image.colorspace,
processInvalidImages: process.env.IMMICH_PROCESS_INVALID_IMAGES === 'true',
// if this is an extracted image, it may not have orientation metadata
orientation: Buffer.isBuffer(inputImage) && exifOrientation ? Number(exifOrientation) : undefined,
});
const thumbnailPath = StorageCore.getPersonThumbnailPath({ id, ownerId });
this.storageCore.ensureFolders(thumbnailPath);
const thumbnailOptions = {
colorspace: image.colorspace,
format: ImageFormat.JPEG,
raw: info,
quality: image.thumbnail.quality,
crop: this.getCrop(
{ old: { width: oldWidth, height: oldHeight }, new: { width: info.width, height: info.height } },
{ x1, y1, x2, y2 },
),
processInvalidImages: false,
size: FACE_THUMBNAIL_SIZE,
};
await this.mediaRepository.generateThumbnail(decodedImage, thumbnailOptions, thumbnailPath);
await this.personRepository.update({ id, thumbnailPath });
return JobStatus.SUCCESS;
}
private getCrop(dims: { old: ImageDimensions; new: ImageDimensions }, { x1, y1, x2, y2 }: BoundingBox): CropOptions {
// face bounding boxes can spill outside the image dimensions
const clampedX1 = clamp(x1, 0, dims.old.width);
const clampedY1 = clamp(y1, 0, dims.old.height);
const clampedX2 = clamp(x2, 0, dims.old.width);
const clampedY2 = clamp(y2, 0, dims.old.height);
const widthScale = dims.new.width / dims.old.width;
const heightScale = dims.new.height / dims.old.height;
const halfWidth = (widthScale * (clampedX2 - clampedX1)) / 2;
const halfHeight = (heightScale * (clampedY2 - clampedY1)) / 2;
const middleX = Math.round(widthScale * clampedX1 + halfWidth);
const middleY = Math.round(heightScale * clampedY1 + halfHeight);
// zoom out 10%
const targetHalfSize = Math.floor(Math.max(halfWidth, halfHeight) * 1.1);
// get the longest distance from the center of the image without overflowing
const newHalfSize = Math.min(
middleX - Math.max(0, middleX - targetHalfSize),
middleY - Math.max(0, middleY - targetHalfSize),
Math.min(dims.new.width - 1, middleX + targetHalfSize) - middleX,
Math.min(dims.new.height - 1, middleY + targetHalfSize) - middleY,
);
return {
left: middleX - newHalfSize,
top: middleY - newHalfSize,
width: newHalfSize * 2,
height: newHalfSize * 2,
};
}
private async generateVideoThumbnails(asset: ThumbnailPathEntity & { originalPath: string }) {
const { image, ffmpeg } = await this.getConfig({ withCache: true });
const previewPath = StorageCore.getImagePath(asset, AssetPathType.PREVIEW, image.preview.format);