feat(server): separate face clustering job (#5598)

* separate facial clustering job

* update api

* fixed some tests

* invert clustering

* hdbscan

* update api

* remove commented code

* wip dbscan

* cleanup

removed cluster endpoint

remove commented code

* fixes

updated tests

minor fixes and formatting

fixed queuing

refinements

* scale search range based on library size

* defer non-core faces

* optimizations

removed unused query option

* assign faces individually for correctness

fixed unit tests

remove unused method

* don't select face embedding

update sql

linting

fixed ml typing

* updated job mock

* paginate people query

* select face embeddings because typeorm

* fix setting face detection concurrency

* update sql

formatting

linting

* simplify logic

remove unused imports

* more specific delete signature

* more accurate typing for face stubs

* add migration

formatting

* chore: better typing

* don't select embedding by default

remove unused import

* updated sql

* use normal try/catch

* stricter concurrency typing and enforcement

* update api

* update job concurrency panel to show disabled queues

formatting

* check jobId in queueAll

fix tests

* remove outdated comment

* better facial recognition icon

* wording

wording

formatting

* fixed tests

* fix

* formatting & sql

* try to fix sql check

* more detailed description

* update sql

* formatting

* wording

* update `minFaces` description

---------

Co-authored-by: Jason Rasmussen <jrasm91@gmail.com>
Co-authored-by: Alex Tran <alex.tran1502@gmail.com>
This commit is contained in:
Mert
2024-01-18 00:08:48 -05:00
committed by GitHub
parent 44873b4224
commit 68f52818ae
57 changed files with 1081 additions and 631 deletions
@@ -25,7 +25,18 @@ import { InjectRepository } from '@nestjs/typeorm';
import _ from 'lodash';
import { DateTime } from 'luxon';
import path from 'path';
import { And, Brackets, FindOptionsRelations, FindOptionsWhere, In, IsNull, LessThan, Not, Repository } from 'typeorm';
import {
And,
Brackets,
FindOptionsRelations,
FindOptionsSelect,
FindOptionsWhere,
In,
IsNull,
LessThan,
Not,
Repository,
} from 'typeorm';
import { AssetEntity, AssetJobStatusEntity, AssetType, ExifEntity, SmartInfoEntity } from '../entities';
import { DummyValue, GenerateSql } from '../infra.util';
import { Chunked, ChunkedArray, OptionalBetween, paginate } from '../infra.utils';
@@ -103,6 +114,7 @@ export class AssetRepository implements IAssetRepository {
withExif: _withExif,
withStacked,
withPeople,
withSmartInfo,
order,
} = options;
@@ -174,6 +186,10 @@ export class AssetRepository implements IAssetRepository {
builder.leftJoinAndSelect('asset.stack', 'stack');
}
if (withSmartInfo) {
builder.leftJoinAndSelect('asset.smartInfo', 'smartInfo');
}
if (withDeleted) {
builder.withDeleted();
}
@@ -250,7 +266,11 @@ export class AssetRepository implements IAssetRepository {
@GenerateSql({ params: [[DummyValue.UUID]] })
@ChunkedArray()
getByIds(ids: string[], relations?: FindOptionsRelations<AssetEntity>): Promise<AssetEntity[]> {
getByIds(
ids: string[],
relations?: FindOptionsRelations<AssetEntity>,
select?: FindOptionsSelect<AssetEntity>,
): Promise<AssetEntity[]> {
if (!relations) {
relations = {
exifInfo: true,
@@ -262,9 +282,11 @@ export class AssetRepository implements IAssetRepository {
stack: true,
};
}
return this.repository.find({
where: { id: In(ids) },
relations,
select,
withDeleted: true,
});
}
@@ -325,12 +347,11 @@ export class AssetRepository implements IAssetRepository {
deletedAt: options.trashedBefore ? And(Not(IsNull()), LessThan(options.trashedBefore)) : undefined,
},
relations: {
exifInfo: true,
smartInfo: true,
tags: true,
faces: {
person: true,
},
exifInfo: options.withExif !== false,
smartInfo: options.withSmartInfo !== false,
tags: options.withSmartInfo !== false,
faces: options.withFaces !== false,
smartSearch: options.withSmartInfo === true,
},
withDeleted: options.withDeleted ?? !!options.trashedBefore,
order: {
@@ -519,6 +540,20 @@ export class AssetRepository implements IAssetRepository {
};
break;
case WithoutProperty.PERSON:
relations = {
faces: true,
};
where = {
resizePath: Not(IsNull()),
isVisible: true,
faces: {
assetId: Not(IsNull()),
personId: IsNull(),
},
};
break;
case WithoutProperty.SIDECAR:
where = [
{ sidecarPath: IsNull(), isVisible: true },
@@ -64,7 +64,15 @@ export class FilesystemProvider implements IStorageRepository {
}
async unlink(file: string) {
await fs.unlink(file);
try {
await fs.unlink(file);
} catch (err) {
if ((err as NodeJS.ErrnoException)?.code === 'ENOENT') {
this.logger.warn(`File ${file} does not exist.`);
} else {
throw err;
}
}
}
stat = fs.stat;
+37 -13
View File
@@ -15,6 +15,7 @@ import { ModuleRef } from '@nestjs/core';
import { SchedulerRegistry } from '@nestjs/schedule';
import { Job, JobsOptions, Processor, Queue, Worker, WorkerOptions } from 'bullmq';
import { CronJob, CronTime } from 'cron';
import { setTimeout } from 'timers/promises';
import { bullConfig } from '../infra.config';
@Injectable()
@@ -121,26 +122,47 @@ export class JobRepository implements IJobRepository {
return;
}
const itemsByQueue = items.reduce<Record<string, JobItem[]>>((acc, item) => {
const promises = [];
const itemsByQueue = {} as Record<string, (JobItem & { data: any; options: JobsOptions | undefined })[]>;
for (const item of items) {
const queueName = JOBS_TO_QUEUE[item.name];
acc[queueName] = acc[queueName] || [];
acc[queueName].push(item);
return acc;
}, {});
for (const [queueName, items] of Object.entries(itemsByQueue)) {
const queue = this.getQueue(queueName as QueueName);
const jobs = items.map((item) => ({
const job = {
name: item.name,
data: (item as { data?: any })?.data || {},
data: item.data || {},
options: this.getJobOptions(item) || undefined,
}));
await queue.addBulk(jobs);
} as JobItem & { data: any; options: JobsOptions | undefined };
if (job.options?.jobId) {
// need to use add() instead of addBulk() for jobId deduplication
promises.push(this.getQueue(queueName).add(item.name, item.data, job.options));
} else {
itemsByQueue[queueName] = itemsByQueue[queueName] || [];
itemsByQueue[queueName].push(job);
}
}
for (const [queueName, jobs] of Object.entries(itemsByQueue)) {
const queue = this.getQueue(queueName as QueueName);
promises.push(queue.addBulk(jobs));
}
await Promise.all(promises);
}
async queue(item: JobItem): Promise<void> {
await this.queueAll([item]);
return this.queueAll([item]);
}
async waitForQueueCompletion(...queues: QueueName[]): Promise<void> {
let activeQueue: QueueStatus | undefined;
do {
const statuses = await Promise.all(queues.map((name) => this.getQueueStatus(name)));
activeQueue = statuses.find((status) => status.isActive);
} while (activeQueue);
{
this.logger.verbose(`Waiting for ${activeQueue} queue to stop...`);
await setTimeout(1000);
}
}
private getJobOptions(item: JobItem): JobsOptions | null {
@@ -149,6 +171,8 @@ export class JobRepository implements IJobRepository {
return { jobId: item.data.id };
case JobName.GENERATE_PERSON_THUMBNAIL:
return { priority: 1 };
case JobName.QUEUE_FACIAL_RECOGNITION:
return { jobId: JobName.QUEUE_FACIAL_RECOGNITION };
default:
return null;
@@ -16,7 +16,7 @@ const errorPrefix = 'Machine learning request';
@Injectable()
export class MachineLearningRepository implements IMachineLearningRepository {
private async post<T>(url: string, input: TextModelInput | VisionModelInput, config: ModelConfig): Promise<T> {
private async predict<T>(url: string, input: TextModelInput | VisionModelInput, config: ModelConfig): Promise<T> {
const formData = await this.getFormData(input, config);
const res = await fetch(`${url}/predict`, { method: 'POST', body: formData }).catch((error: Error | any) => {
@@ -31,11 +31,11 @@ export class MachineLearningRepository implements IMachineLearningRepository {
}
detectFaces(url: string, input: VisionModelInput, config: RecognitionConfig): Promise<DetectFaceResult[]> {
return this.post<DetectFaceResult[]>(url, input, { ...config, modelType: ModelType.FACIAL_RECOGNITION });
return this.predict<DetectFaceResult[]>(url, input, { ...config, modelType: ModelType.FACIAL_RECOGNITION });
}
encodeImage(url: string, input: VisionModelInput, config: CLIPConfig): Promise<number[]> {
return this.post<number[]>(url, input, {
return this.predict<number[]>(url, input, {
...config,
modelType: ModelType.CLIP,
mode: CLIPMode.VISION,
@@ -43,7 +43,11 @@ export class MachineLearningRepository implements IMachineLearningRepository {
}
encodeText(url: string, input: TextModelInput, config: CLIPConfig): Promise<number[]> {
return this.post<number[]>(url, input, { ...config, modelType: ModelType.CLIP, mode: CLIPMode.TEXT } as CLIPConfig);
return this.predict<number[]>(url, input, {
...config,
modelType: ModelType.CLIP,
mode: CLIPMode.TEXT,
} as CLIPConfig);
}
async getFormData(input: TextModelInput | VisionModelInput, config: ModelConfig): Promise<FormData> {
@@ -1,16 +1,19 @@
import {
AssetFaceId,
IPersonRepository,
Paginated,
PaginationOptions,
PersonNameSearchOptions,
PersonSearchOptions,
PersonStatistics,
UpdateFacesData,
} from '@app/domain';
import { InjectRepository } from '@nestjs/typeorm';
import { In, Repository } from 'typeorm';
import _ from 'lodash';
import { FindManyOptions, FindOptionsRelations, FindOptionsSelect, In, Repository } from 'typeorm';
import { AssetEntity, AssetFaceEntity, PersonEntity } from '../entities';
import { DummyValue, GenerateSql } from '../infra.util';
import { Chunked, ChunkedArray, asVector } from '../infra.utils';
import { ChunkedArray, asVector, paginate } from '../infra.utils';
export class PersonRepository implements IPersonRepository {
constructor(
@@ -19,64 +22,44 @@ export class PersonRepository implements IPersonRepository {
@InjectRepository(AssetFaceEntity) private assetFaceRepository: Repository<AssetFaceEntity>,
) {}
/**
* Before reassigning faces, delete potential key violations
*/
async prepareReassignFaces({ oldPersonId, newPersonId }: UpdateFacesData): Promise<string[]> {
const results = await this.assetFaceRepository
.createQueryBuilder('face')
.select('face."assetId"')
.where(`face."personId" IN (:...ids)`, { ids: [oldPersonId, newPersonId] })
.groupBy('face."assetId"')
.having('COUNT(face."personId") > 1')
.getRawMany();
const assetIds = results.map(({ assetId }) => assetId);
await this.deletePersonFromAssets(oldPersonId, assetIds);
return assetIds;
}
@Chunked({ paramIndex: 1 })
async deletePersonFromAssets(personId: string, assetIds: string[]): Promise<void> {
await this.assetFaceRepository.delete({ personId: personId, assetId: In(assetIds) });
}
@GenerateSql({ params: [{ oldPersonId: DummyValue.UUID, newPersonId: DummyValue.UUID }] })
async reassignFaces({ oldPersonId, newPersonId }: UpdateFacesData): Promise<number> {
async reassignFaces({ oldPersonId, faceIds, newPersonId }: UpdateFacesData): Promise<number> {
const result = await this.assetFaceRepository
.createQueryBuilder()
.update()
.set({ personId: newPersonId })
.where({ personId: oldPersonId })
.where(
_.omitBy(
{ personId: oldPersonId ? oldPersonId : undefined, id: faceIds ? In(faceIds) : undefined },
_.isUndefined,
),
)
.execute();
return result.affected ?? 0;
}
delete(entity: PersonEntity): Promise<PersonEntity | null> {
return this.personRepository.remove(entity);
async delete(entities: PersonEntity[]): Promise<void> {
await this.personRepository.remove(entities);
}
async deleteAll(): Promise<number> {
const people = await this.personRepository.find();
await this.personRepository.remove(people);
return people.length;
async deleteAll(): Promise<void> {
await this.personRepository.delete({});
}
@GenerateSql()
getAllFaces(): Promise<AssetFaceEntity[]> {
return this.assetFaceRepository.find({ relations: { asset: true }, withDeleted: true });
async deleteAllFaces(): Promise<void> {
await this.assetFaceRepository.delete({});
}
@GenerateSql()
getAll(): Promise<PersonEntity[]> {
return this.personRepository.find();
getAllFaces(
pagination: PaginationOptions,
options: FindManyOptions<AssetFaceEntity> = {},
): Paginated<AssetFaceEntity> {
return paginate(this.assetFaceRepository, pagination, options);
}
@GenerateSql()
getAllWithoutThumbnail(): Promise<PersonEntity[]> {
return this.personRepository.findBy({ thumbnailPath: '' });
getAll(pagination: PaginationOptions, options: FindManyOptions<PersonEntity> = {}): Paginated<PersonEntity> {
return paginate(this.personRepository, pagination, options);
}
@GenerateSql({ params: [DummyValue.UUID] })
@@ -133,14 +116,25 @@ export class PersonRepository implements IPersonRepository {
}
@GenerateSql({ params: [DummyValue.UUID] })
getFaceByIdWithAssets(id: string): Promise<AssetFaceEntity | null> {
return this.assetFaceRepository.findOne({
where: { id },
relations: {
person: true,
asset: true,
},
});
getFaceByIdWithAssets(
id: string,
relations: FindOptionsRelations<AssetFaceEntity>,
select: FindOptionsSelect<AssetFaceEntity>,
): Promise<AssetFaceEntity | null> {
return this.assetFaceRepository.findOne(
_.omitBy(
{
where: { id },
relations: {
...relations,
person: true,
asset: true,
},
select,
},
_.isUndefined,
),
);
}
@GenerateSql({ params: [DummyValue.UUID, DummyValue.UUID] })
@@ -221,15 +215,11 @@ export class PersonRepository implements IPersonRepository {
return this.personRepository.save(entity);
}
async createFace(entity: AssetFaceEntity): Promise<AssetFaceEntity> {
if (!entity.personId) {
throw new Error('Person ID is required to create a face');
}
async createFace(entity: AssetFaceEntity): Promise<void> {
if (!entity.embedding) {
throw new Error('Embedding is required to create a face');
}
await this.assetFaceRepository.insert({ ...entity, embedding: () => asVector(entity.embedding, true) });
return this.assetFaceRepository.findOneByOrFail({ assetId: entity.assetId, personId: entity.personId });
}
async update(entity: Partial<PersonEntity>): Promise<PersonEntity> {
@@ -1,4 +1,4 @@
import { Embedding, EmbeddingSearch, ISmartInfoRepository } from '@app/domain';
import { Embedding, EmbeddingSearch, FaceEmbeddingSearch, FaceSearchResult, ISmartInfoRepository } from '@app/domain';
import { getCLIPModelInfo } from '@app/domain/smart-info/smart-info.constant';
import { AssetEntity, AssetFaceEntity, SmartInfoEntity, SmartSearchEntity } from '@app/infra/entities';
import { ImmichLogger } from '@app/infra/logger';
@@ -44,32 +44,33 @@ export class SmartInfoRepository implements ISmartInfoRepository {
params: [{ userIds: [DummyValue.UUID], embedding: Array.from({ length: 512 }, Math.random), numResults: 100 }],
})
async searchCLIP({ userIds, embedding, numResults, withArchived }: EmbeddingSearch): Promise<AssetEntity[]> {
if (!isValidInteger(numResults, { min: 1 })) {
throw new Error(`Invalid value for 'numResults': ${numResults}`);
}
let results: AssetEntity[] = [];
await this.assetRepository.manager.transaction(async (manager) => {
await manager.query(`SET LOCAL vectors.k = '${numResults}'`);
await manager.query(`SET LOCAL vectors.enable_prefilter = on`);
const query = manager
let query = manager
.createQueryBuilder(AssetEntity, 'a')
.innerJoin('a.smartSearch', 's')
.leftJoinAndSelect('a.exifInfo', 'e')
.where('a.ownerId IN (:...userIds )')
.andWhere('a.isVisible = true');
.orderBy('s.embedding <=> :embedding')
.setParameters({ userIds, embedding: asVector(embedding) });
if (!withArchived) {
query.andWhere('a.isArchived = false');
}
query.andWhere('a.isVisible = true').andWhere('a.fileCreatedAt < NOW()');
results = await query
.andWhere('a.fileCreatedAt < NOW()')
.leftJoinAndSelect('a.exifInfo', 'e')
.orderBy('s.embedding <=> :embedding')
.setParameters({ userIds, embedding: asVector(embedding) })
.limit(numResults)
.getMany();
if (numResults) {
if (!isValidInteger(numResults, { min: 1 })) {
throw new Error(`Invalid value for 'numResults': ${numResults}`);
}
query = query.limit(numResults);
await manager.query(`SET LOCAL vectors.k = '${numResults}'`);
}
results = await query.getMany();
});
return results;
@@ -85,22 +86,38 @@ export class SmartInfoRepository implements ISmartInfoRepository {
},
],
})
async searchFaces({ userIds, embedding, numResults, maxDistance }: EmbeddingSearch): Promise<AssetFaceEntity[]> {
if (!isValidInteger(numResults, { min: 1 })) {
throw new Error(`Invalid value for 'numResults': ${numResults}`);
}
let results: AssetFaceEntity[] = [];
async searchFaces({
userIds,
embedding,
numResults,
maxDistance,
hasPerson,
}: FaceEmbeddingSearch): Promise<FaceSearchResult[]> {
let results: Array<AssetFaceEntity & { distance: number }> = [];
await this.assetRepository.manager.transaction(async (manager) => {
await manager.query(`SET LOCAL vectors.k = '${numResults}'`);
const cte = manager
await manager.query(`SET LOCAL vectors.enable_prefilter = on`);
let cte = manager
.createQueryBuilder(AssetFaceEntity, 'faces')
.select('1 + (faces.embedding <=> :embedding)', 'distance')
.innerJoin('faces.asset', 'asset')
.where('asset.ownerId IN (:...userIds )')
.orderBy('1 + (faces.embedding <=> :embedding)')
.setParameters({ userIds, embedding: asVector(embedding) })
.limit(numResults);
.setParameters({ userIds, embedding: asVector(embedding) });
if (numResults) {
if (!isValidInteger(numResults, { min: 1 })) {
throw new Error(`Invalid value for 'numResults': ${numResults}`);
}
cte = cte.limit(numResults);
if (numResults > 64) {
// setting k too low messes with prefilter recall
await manager.query(`SET LOCAL vectors.k = '${numResults}'`);
}
}
if (hasPerson) {
cte = cte.andWhere('faces."personId" IS NOT NULL');
}
this.faceColumns.forEach((col) => cte.addSelect(`faces.${col}`, col));
@@ -113,7 +130,10 @@ export class SmartInfoRepository implements ISmartInfoRepository {
.getRawMany();
});
return this.assetFaceRepository.create(results);
return results.map((row) => ({
face: this.assetFaceRepository.create(row),
distance: row.distance,
}));
}
async upsert(smartInfo: Partial<SmartInfoEntity>, embedding?: Embedding): Promise<void> {