feat(server): near-duplicate detection (#8228)
* duplicate detection job, entity, config * queueing * job panel, update api * use embedding in db instead of fetching * disable concurrency * only queue visible assets * handle multiple duplicateIds * update concurrent queue check * add provider * add web placeholder, server endpoint, migration, various fixes * update sql * select embedding by default * rename variable * simplify * remove separate entity, handle re-running with different threshold, set default back to 0.02 * fix tests * add tests * add index to entity * formatting * update asset mock * fix `upsertJobStatus` signature * update sql * formatting * default to 0.03 * optimize clustering * use asset's `duplicateId` if present * update sql * update tests * expose admin setting * refactor * formatting * skip if ml is disabled * debug trash e2e * remove from web * remove from sidebar * test if ml is disabled * update sql * separate duplicate detection from clip in config, disable by default for now * fix doc * lower minimum `maxDistance` * update api * Add and Use Duplicate Detection Feature Flag (#9364) * Add Duplicate Detection Flag * Use Duplicate Detection Flag * Attempt Fixes for Failing Checks * lower minimum `maxDistance` * fix tests --------- Co-authored-by: mertalev <101130780+mertalev@users.noreply.github.com> * chore: fixes and additions after rebase * chore: update api (remove new Role enum) * fix: left join smart search so getAll works without machine learning * test: trash e2e go back to checking length of assets is zero * chore: regen api after rebase * test: fix tests after rebase * redundant join --------- Co-authored-by: Nicholas Flamy <30300649+NicholasFlamy@users.noreply.github.com> Co-authored-by: Zack Pollard <zackpollard@ymail.com> Co-authored-by: Zack Pollard <zack@futo.org>
This commit is contained in:
@@ -286,6 +286,11 @@ export class AssetService {
|
||||
return data;
|
||||
}
|
||||
|
||||
async getDuplicates(auth: AuthDto): Promise<AssetResponseDto[]> {
|
||||
const res = await this.assetRepository.getDuplicates({ userIds: [auth.user.id] });
|
||||
return res.map((a) => mapAsset(a, { auth }));
|
||||
}
|
||||
|
||||
async update(auth: AuthDto, id: string, dto: UpdateAssetDto): Promise<AssetResponseDto> {
|
||||
await this.access.requirePermission(auth, Permission.ASSET_UPDATE, id);
|
||||
|
||||
|
||||
@@ -109,6 +109,7 @@ describe(JobService.name, () => {
|
||||
|
||||
await expect(sut.getAllJobsStatus()).resolves.toEqual({
|
||||
[QueueName.BACKGROUND_TASK]: expectedJobStatus,
|
||||
[QueueName.DUPLICATE_DETECTION]: expectedJobStatus,
|
||||
[QueueName.SMART_SEARCH]: expectedJobStatus,
|
||||
[QueueName.METADATA_EXTRACTION]: expectedJobStatus,
|
||||
[QueueName.SEARCH]: expectedJobStatus,
|
||||
|
||||
@@ -115,6 +115,10 @@ export class JobService {
|
||||
return this.jobRepository.queue({ name: JobName.QUEUE_SMART_SEARCH, data: { force } });
|
||||
}
|
||||
|
||||
case QueueName.DUPLICATE_DETECTION: {
|
||||
return this.jobRepository.queue({ name: JobName.QUEUE_DUPLICATE_DETECTION, data: { force } });
|
||||
}
|
||||
|
||||
case QueueName.METADATA_EXTRACTION: {
|
||||
return this.jobRepository.queue({ name: JobName.QUEUE_METADATA_EXTRACTION, data: { force } });
|
||||
}
|
||||
@@ -191,7 +195,11 @@ export class JobService {
|
||||
}
|
||||
|
||||
private isConcurrentQueue(name: QueueName): name is ConcurrentQueueName {
|
||||
return ![QueueName.FACIAL_RECOGNITION, QueueName.STORAGE_TEMPLATE_MIGRATION].includes(name);
|
||||
return ![
|
||||
QueueName.FACIAL_RECOGNITION,
|
||||
QueueName.STORAGE_TEMPLATE_MIGRATION,
|
||||
QueueName.DUPLICATE_DETECTION,
|
||||
].includes(name);
|
||||
}
|
||||
|
||||
async handleNightlyJobs() {
|
||||
@@ -294,6 +302,13 @@ export class JobService {
|
||||
break;
|
||||
}
|
||||
|
||||
case JobName.SMART_SEARCH: {
|
||||
if (item.data.source === 'upload') {
|
||||
await this.jobRepository.queue({ name: JobName.DUPLICATE_DETECTION, data: item.data });
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case JobName.USER_DELETION: {
|
||||
this.eventRepository.clientBroadcast(ClientEvent.USER_DELETE, item.data.id);
|
||||
break;
|
||||
|
||||
@@ -9,6 +9,7 @@ import { MediaService } from 'src/services/media.service';
|
||||
import { MetadataService } from 'src/services/metadata.service';
|
||||
import { NotificationService } from 'src/services/notification.service';
|
||||
import { PersonService } from 'src/services/person.service';
|
||||
import { SearchService } from 'src/services/search.service';
|
||||
import { SessionService } from 'src/services/session.service';
|
||||
import { SmartInfoService } from 'src/services/smart-info.service';
|
||||
import { StorageTemplateService } from 'src/services/storage-template.service';
|
||||
@@ -35,6 +36,7 @@ export class MicroservicesService {
|
||||
private storageTemplateService: StorageTemplateService,
|
||||
private storageService: StorageService,
|
||||
private userService: UserService,
|
||||
private searchService: SearchService,
|
||||
) {}
|
||||
|
||||
async init() {
|
||||
@@ -53,6 +55,8 @@ export class MicroservicesService {
|
||||
[JobName.USER_SYNC_USAGE]: () => this.userService.handleUserSyncUsage(),
|
||||
[JobName.QUEUE_SMART_SEARCH]: (data) => this.smartInfoService.handleQueueEncodeClip(data),
|
||||
[JobName.SMART_SEARCH]: (data) => this.smartInfoService.handleEncodeClip(data),
|
||||
[JobName.QUEUE_DUPLICATE_DETECTION]: (data) => this.searchService.handleQueueSearchDuplicates(data),
|
||||
[JobName.DUPLICATE_DETECTION]: (data) => this.searchService.handleSearchDuplicates(data),
|
||||
[JobName.STORAGE_TEMPLATE_MIGRATION]: () => this.storageTemplateService.handleMigration(),
|
||||
[JobName.STORAGE_TEMPLATE_MIGRATION_SINGLE]: (data) => this.storageTemplateService.handleMigrationSingle(data),
|
||||
[JobName.QUEUE_MIGRATION]: () => this.mediaService.handleQueueMigration(),
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import { mapAsset } from 'src/dtos/asset-response.dto';
|
||||
import { IAssetRepository } from 'src/interfaces/asset.interface';
|
||||
import { IAssetRepository, WithoutProperty } from 'src/interfaces/asset.interface';
|
||||
import { ICryptoRepository } from 'src/interfaces/crypto.interface';
|
||||
import { IJobRepository, JobName, JobStatus } from 'src/interfaces/job.interface';
|
||||
import { ILoggerRepository } from 'src/interfaces/logger.interface';
|
||||
import { IMachineLearningRepository } from 'src/interfaces/machine-learning.interface';
|
||||
import { IMetadataRepository } from 'src/interfaces/metadata.interface';
|
||||
@@ -12,6 +14,8 @@ import { assetStub } from 'test/fixtures/asset.stub';
|
||||
import { authStub } from 'test/fixtures/auth.stub';
|
||||
import { personStub } from 'test/fixtures/person.stub';
|
||||
import { newAssetRepositoryMock } from 'test/repositories/asset.repository.mock';
|
||||
import { newCryptoRepositoryMock } from 'test/repositories/crypto.repository.mock';
|
||||
import { newJobRepositoryMock } from 'test/repositories/job.repository.mock';
|
||||
import { newLoggerRepositoryMock } from 'test/repositories/logger.repository.mock';
|
||||
import { newMachineLearningRepositoryMock } from 'test/repositories/machine-learning.repository.mock';
|
||||
import { newMetadataRepositoryMock } from 'test/repositories/metadata.repository.mock';
|
||||
@@ -19,7 +23,7 @@ import { newPartnerRepositoryMock } from 'test/repositories/partner.repository.m
|
||||
import { newPersonRepositoryMock } from 'test/repositories/person.repository.mock';
|
||||
import { newSearchRepositoryMock } from 'test/repositories/search.repository.mock';
|
||||
import { newSystemMetadataRepositoryMock } from 'test/repositories/system-metadata.repository.mock';
|
||||
import { Mocked, vitest } from 'vitest';
|
||||
import { Mocked, beforeEach, vitest } from 'vitest';
|
||||
|
||||
vitest.useFakeTimers();
|
||||
|
||||
@@ -33,6 +37,8 @@ describe(SearchService.name, () => {
|
||||
let partnerMock: Mocked<IPartnerRepository>;
|
||||
let metadataMock: Mocked<IMetadataRepository>;
|
||||
let loggerMock: Mocked<ILoggerRepository>;
|
||||
let cryptoMock: Mocked<ICryptoRepository>;
|
||||
let jobMock: Mocked<IJobRepository>;
|
||||
|
||||
beforeEach(() => {
|
||||
assetMock = newAssetRepositoryMock();
|
||||
@@ -43,6 +49,8 @@ describe(SearchService.name, () => {
|
||||
partnerMock = newPartnerRepositoryMock();
|
||||
metadataMock = newMetadataRepositoryMock();
|
||||
loggerMock = newLoggerRepositoryMock();
|
||||
cryptoMock = newCryptoRepositoryMock();
|
||||
jobMock = newJobRepositoryMock();
|
||||
|
||||
sut = new SearchService(
|
||||
systemMock,
|
||||
@@ -53,6 +61,8 @@ describe(SearchService.name, () => {
|
||||
partnerMock,
|
||||
metadataMock,
|
||||
loggerMock,
|
||||
cryptoMock,
|
||||
jobMock,
|
||||
);
|
||||
});
|
||||
|
||||
@@ -76,15 +86,15 @@ describe(SearchService.name, () => {
|
||||
|
||||
describe('getExploreData', () => {
|
||||
it('should get assets by city and tag', async () => {
|
||||
assetMock.getAssetIdByCity.mockResolvedValueOnce({
|
||||
assetMock.getAssetIdByCity.mockResolvedValue({
|
||||
fieldName: 'exifInfo.city',
|
||||
items: [{ value: 'Paris', data: assetStub.image.id }],
|
||||
});
|
||||
assetMock.getAssetIdByTag.mockResolvedValueOnce({
|
||||
assetMock.getAssetIdByTag.mockResolvedValue({
|
||||
fieldName: 'smartInfo.tags',
|
||||
items: [{ value: 'train', data: assetStub.imageFrom2015.id }],
|
||||
});
|
||||
assetMock.getByIdsWithAllRelations.mockResolvedValueOnce([assetStub.image, assetStub.imageFrom2015]);
|
||||
assetMock.getByIdsWithAllRelations.mockResolvedValue([assetStub.image, assetStub.imageFrom2015]);
|
||||
const expectedResponse = [
|
||||
{ fieldName: 'exifInfo.city', items: [{ value: 'Paris', data: mapAsset(assetStub.image) }] },
|
||||
{ fieldName: 'smartInfo.tags', items: [{ value: 'train', data: mapAsset(assetStub.imageFrom2015) }] },
|
||||
@@ -95,4 +105,234 @@ describe(SearchService.name, () => {
|
||||
expect(result).toEqual(expectedResponse);
|
||||
});
|
||||
});
|
||||
|
||||
describe('handleQueueSearchDuplicates', () => {
|
||||
beforeEach(() => {
|
||||
systemMock.get.mockResolvedValue({
|
||||
machineLearning: {
|
||||
enabled: true,
|
||||
duplicateDetection: {
|
||||
enabled: true,
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should skip if machine learning is disabled', async () => {
|
||||
systemMock.get.mockResolvedValue({
|
||||
machineLearning: {
|
||||
enabled: false,
|
||||
duplicateDetection: {
|
||||
enabled: true,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
await expect(sut.handleQueueSearchDuplicates({})).resolves.toBe(JobStatus.SKIPPED);
|
||||
expect(jobMock.queue).not.toHaveBeenCalled();
|
||||
expect(jobMock.queueAll).not.toHaveBeenCalled();
|
||||
expect(systemMock.get).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should skip if duplicate detection is disabled', async () => {
|
||||
systemMock.get.mockResolvedValue({
|
||||
machineLearning: {
|
||||
enabled: true,
|
||||
duplicateDetection: {
|
||||
enabled: false,
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
await expect(sut.handleQueueSearchDuplicates({})).resolves.toBe(JobStatus.SKIPPED);
|
||||
expect(jobMock.queue).not.toHaveBeenCalled();
|
||||
expect(jobMock.queueAll).not.toHaveBeenCalled();
|
||||
expect(systemMock.get).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it('should queue missing assets', async () => {
|
||||
assetMock.getWithout.mockResolvedValue({
|
||||
items: [assetStub.image],
|
||||
hasNextPage: false,
|
||||
});
|
||||
|
||||
await sut.handleQueueSearchDuplicates({});
|
||||
|
||||
expect(assetMock.getWithout).toHaveBeenCalledWith({ skip: 0, take: 1000 }, WithoutProperty.DUPLICATE);
|
||||
expect(jobMock.queueAll).toHaveBeenCalledWith([
|
||||
{
|
||||
name: JobName.DUPLICATE_DETECTION,
|
||||
data: { id: assetStub.image.id },
|
||||
},
|
||||
]);
|
||||
});
|
||||
|
||||
it('should queue all assets', async () => {
|
||||
assetMock.getAll.mockResolvedValue({
|
||||
items: [assetStub.image],
|
||||
hasNextPage: false,
|
||||
});
|
||||
personMock.getAll.mockResolvedValue({
|
||||
items: [personStub.withName],
|
||||
hasNextPage: false,
|
||||
});
|
||||
|
||||
await sut.handleQueueSearchDuplicates({ force: true });
|
||||
|
||||
expect(assetMock.getAll).toHaveBeenCalled();
|
||||
expect(jobMock.queueAll).toHaveBeenCalledWith([
|
||||
{
|
||||
name: JobName.DUPLICATE_DETECTION,
|
||||
data: { id: assetStub.image.id },
|
||||
},
|
||||
]);
|
||||
});
|
||||
});
|
||||
|
||||
describe('handleSearchDuplicates', () => {
|
||||
beforeEach(() => {
|
||||
systemMock.get.mockResolvedValue({
|
||||
machineLearning: {
|
||||
enabled: true,
|
||||
duplicateDetection: {
|
||||
enabled: true,
|
||||
},
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
it('should skip if machine learning is disabled', async () => {
|
||||
systemMock.get.mockResolvedValue({
|
||||
machineLearning: {
|
||||
enabled: false,
|
||||
duplicateDetection: {
|
||||
enabled: true,
|
||||
},
|
||||
},
|
||||
});
|
||||
const id = assetStub.livePhotoMotionAsset.id;
|
||||
assetMock.getById.mockResolvedValue(assetStub.livePhotoMotionAsset);
|
||||
|
||||
const result = await sut.handleSearchDuplicates({ id });
|
||||
|
||||
expect(result).toBe(JobStatus.SKIPPED);
|
||||
});
|
||||
|
||||
it('should skip if duplicate detection is disabled', async () => {
|
||||
systemMock.get.mockResolvedValue({
|
||||
machineLearning: {
|
||||
enabled: true,
|
||||
duplicateDetection: {
|
||||
enabled: false,
|
||||
},
|
||||
},
|
||||
});
|
||||
const id = assetStub.livePhotoMotionAsset.id;
|
||||
assetMock.getById.mockResolvedValue(assetStub.livePhotoMotionAsset);
|
||||
|
||||
const result = await sut.handleSearchDuplicates({ id });
|
||||
|
||||
expect(result).toBe(JobStatus.SKIPPED);
|
||||
});
|
||||
|
||||
it('should fail if asset is not found', async () => {
|
||||
const result = await sut.handleSearchDuplicates({ id: assetStub.image.id });
|
||||
|
||||
expect(result).toBe(JobStatus.FAILED);
|
||||
expect(loggerMock.error).toHaveBeenCalledWith(`Asset ${assetStub.image.id} not found`);
|
||||
});
|
||||
|
||||
it('should skip if asset is not visible', async () => {
|
||||
const id = assetStub.livePhotoMotionAsset.id;
|
||||
assetMock.getById.mockResolvedValue(assetStub.livePhotoMotionAsset);
|
||||
|
||||
const result = await sut.handleSearchDuplicates({ id });
|
||||
|
||||
expect(result).toBe(JobStatus.SKIPPED);
|
||||
expect(loggerMock.debug).toHaveBeenCalledWith(`Asset ${id} is not visible, skipping`);
|
||||
});
|
||||
|
||||
it('should fail if asset is missing preview image', async () => {
|
||||
assetMock.getById.mockResolvedValue(assetStub.noResizePath);
|
||||
|
||||
const result = await sut.handleSearchDuplicates({ id: assetStub.noResizePath.id });
|
||||
|
||||
expect(result).toBe(JobStatus.FAILED);
|
||||
expect(loggerMock.warn).toHaveBeenCalledWith(`Asset ${assetStub.noResizePath.id} is missing preview image`);
|
||||
});
|
||||
|
||||
it('should fail if asset is missing embedding', async () => {
|
||||
assetMock.getById.mockResolvedValue(assetStub.image);
|
||||
|
||||
const result = await sut.handleSearchDuplicates({ id: assetStub.image.id });
|
||||
|
||||
expect(result).toBe(JobStatus.FAILED);
|
||||
expect(loggerMock.debug).toHaveBeenCalledWith(`Asset ${assetStub.image.id} is missing embedding`);
|
||||
});
|
||||
|
||||
it('should search for duplicates and update asset with duplicateId', async () => {
|
||||
assetMock.getById.mockResolvedValue(assetStub.hasEmbedding);
|
||||
searchMock.searchDuplicates.mockResolvedValue([
|
||||
{ assetId: assetStub.image.id, distance: 0.01, duplicateId: null },
|
||||
]);
|
||||
const expectedAssetIds = [assetStub.image.id, assetStub.hasEmbedding.id];
|
||||
|
||||
const result = await sut.handleSearchDuplicates({ id: assetStub.hasEmbedding.id });
|
||||
|
||||
expect(result).toBe(JobStatus.SUCCESS);
|
||||
expect(searchMock.searchDuplicates).toHaveBeenCalledWith({
|
||||
assetId: assetStub.hasEmbedding.id,
|
||||
embedding: assetStub.hasEmbedding.smartSearch!.embedding,
|
||||
maxDistance: 0.03,
|
||||
userIds: [assetStub.hasEmbedding.ownerId],
|
||||
});
|
||||
expect(assetMock.updateDuplicates).toHaveBeenCalledWith({
|
||||
assetIds: expectedAssetIds,
|
||||
targetDuplicateId: expect.any(String),
|
||||
duplicateIds: [],
|
||||
});
|
||||
expect(assetMock.upsertJobStatus).toHaveBeenCalledWith(
|
||||
...expectedAssetIds.map((assetId) => ({ assetId, duplicatesDetectedAt: expect.any(Date) })),
|
||||
);
|
||||
});
|
||||
|
||||
it('should use existing duplicate ID among matched duplicates', async () => {
|
||||
const duplicateId = assetStub.hasDupe.duplicateId;
|
||||
assetMock.getById.mockResolvedValue(assetStub.hasEmbedding);
|
||||
searchMock.searchDuplicates.mockResolvedValue([{ assetId: assetStub.hasDupe.id, distance: 0.01, duplicateId }]);
|
||||
const expectedAssetIds = [assetStub.hasEmbedding.id];
|
||||
|
||||
const result = await sut.handleSearchDuplicates({ id: assetStub.hasEmbedding.id });
|
||||
|
||||
expect(result).toBe(JobStatus.SUCCESS);
|
||||
expect(searchMock.searchDuplicates).toHaveBeenCalledWith({
|
||||
assetId: assetStub.hasEmbedding.id,
|
||||
embedding: assetStub.hasEmbedding.smartSearch!.embedding,
|
||||
maxDistance: 0.03,
|
||||
userIds: [assetStub.hasEmbedding.ownerId],
|
||||
});
|
||||
expect(assetMock.updateDuplicates).toHaveBeenCalledWith({
|
||||
assetIds: expectedAssetIds,
|
||||
targetDuplicateId: assetStub.hasDupe.duplicateId,
|
||||
duplicateIds: [],
|
||||
});
|
||||
expect(assetMock.upsertJobStatus).toHaveBeenCalledWith(
|
||||
...expectedAssetIds.map((assetId) => ({ assetId, duplicatesDetectedAt: expect.any(Date) })),
|
||||
);
|
||||
});
|
||||
|
||||
it('should remove duplicateId if no duplicates found and asset has duplicateId', async () => {
|
||||
assetMock.getById.mockResolvedValue(assetStub.hasDupe);
|
||||
searchMock.searchDuplicates.mockResolvedValue([]);
|
||||
|
||||
const result = await sut.handleSearchDuplicates({ id: assetStub.hasDupe.id });
|
||||
|
||||
expect(result).toBe(JobStatus.SUCCESS);
|
||||
expect(assetMock.update).toHaveBeenCalledWith({ id: assetStub.hasDupe.id, duplicateId: null });
|
||||
expect(assetMock.upsertJobStatus).toHaveBeenCalledWith({
|
||||
assetId: assetStub.hasDupe.id,
|
||||
duplicatesDetectedAt: expect.any(Date),
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@@ -16,15 +16,25 @@ import {
|
||||
} from 'src/dtos/search.dto';
|
||||
import { AssetOrder } from 'src/entities/album.entity';
|
||||
import { AssetEntity } from 'src/entities/asset.entity';
|
||||
import { IAssetRepository } from 'src/interfaces/asset.interface';
|
||||
import { IAssetRepository, WithoutProperty } from 'src/interfaces/asset.interface';
|
||||
import { ICryptoRepository } from 'src/interfaces/crypto.interface';
|
||||
import {
|
||||
IBaseJob,
|
||||
IEntityJob,
|
||||
IJobRepository,
|
||||
JOBS_ASSET_PAGINATION_SIZE,
|
||||
JobName,
|
||||
JobStatus,
|
||||
} from 'src/interfaces/job.interface';
|
||||
import { ILoggerRepository } from 'src/interfaces/logger.interface';
|
||||
import { IMachineLearningRepository } from 'src/interfaces/machine-learning.interface';
|
||||
import { IMetadataRepository } from 'src/interfaces/metadata.interface';
|
||||
import { IPartnerRepository } from 'src/interfaces/partner.interface';
|
||||
import { IPersonRepository } from 'src/interfaces/person.interface';
|
||||
import { ISearchRepository, SearchExploreItem } from 'src/interfaces/search.interface';
|
||||
import { AssetDuplicateResult, ISearchRepository, SearchExploreItem } from 'src/interfaces/search.interface';
|
||||
import { ISystemMetadataRepository } from 'src/interfaces/system-metadata.interface';
|
||||
import { isSmartSearchEnabled } from 'src/utils/misc';
|
||||
import { isDuplicateDetectionEnabled, isSmartSearchEnabled } from 'src/utils/misc';
|
||||
import { usePagination } from 'src/utils/pagination';
|
||||
|
||||
@Injectable()
|
||||
export class SearchService {
|
||||
@@ -39,6 +49,8 @@ export class SearchService {
|
||||
@Inject(IPartnerRepository) private partnerRepository: IPartnerRepository,
|
||||
@Inject(IMetadataRepository) private metadataRepository: IMetadataRepository,
|
||||
@Inject(ILoggerRepository) private logger: ILoggerRepository,
|
||||
@Inject(ICryptoRepository) private cryptoRepository: ICryptoRepository,
|
||||
@Inject(IJobRepository) private jobRepository: IJobRepository,
|
||||
) {
|
||||
this.logger.setContext(SearchService.name);
|
||||
this.configCore = SystemConfigCore.create(systemMetadataRepository, logger);
|
||||
@@ -147,6 +159,97 @@ export class SearchService {
|
||||
}
|
||||
}
|
||||
|
||||
async handleQueueSearchDuplicates({ force }: IBaseJob): Promise<JobStatus> {
|
||||
const { machineLearning } = await this.configCore.getConfig();
|
||||
if (!isDuplicateDetectionEnabled(machineLearning)) {
|
||||
return JobStatus.SKIPPED;
|
||||
}
|
||||
|
||||
const assetPagination = usePagination(JOBS_ASSET_PAGINATION_SIZE, (pagination) => {
|
||||
return force
|
||||
? this.assetRepository.getAll(pagination, { isVisible: true })
|
||||
: this.assetRepository.getWithout(pagination, WithoutProperty.DUPLICATE);
|
||||
});
|
||||
|
||||
for await (const assets of assetPagination) {
|
||||
await this.jobRepository.queueAll(
|
||||
assets.map((asset) => ({ name: JobName.DUPLICATE_DETECTION, data: { id: asset.id } })),
|
||||
);
|
||||
}
|
||||
|
||||
return JobStatus.SUCCESS;
|
||||
}
|
||||
|
||||
async handleSearchDuplicates({ id }: IEntityJob): Promise<JobStatus> {
|
||||
const { machineLearning } = await this.configCore.getConfig();
|
||||
if (!isDuplicateDetectionEnabled(machineLearning)) {
|
||||
return JobStatus.SKIPPED;
|
||||
}
|
||||
|
||||
const asset = await this.assetRepository.getById(id, { smartSearch: true });
|
||||
if (!asset) {
|
||||
this.logger.error(`Asset ${id} not found`);
|
||||
return JobStatus.FAILED;
|
||||
}
|
||||
|
||||
if (!asset.isVisible) {
|
||||
this.logger.debug(`Asset ${id} is not visible, skipping`);
|
||||
return JobStatus.SKIPPED;
|
||||
}
|
||||
|
||||
if (!asset.previewPath) {
|
||||
this.logger.warn(`Asset ${id} is missing preview image`);
|
||||
return JobStatus.FAILED;
|
||||
}
|
||||
|
||||
if (!asset.smartSearch?.embedding) {
|
||||
this.logger.debug(`Asset ${id} is missing embedding`);
|
||||
return JobStatus.FAILED;
|
||||
}
|
||||
|
||||
const duplicateAssets = await this.searchRepository.searchDuplicates({
|
||||
assetId: asset.id,
|
||||
embedding: asset.smartSearch.embedding,
|
||||
maxDistance: machineLearning.duplicateDetection.maxDistance,
|
||||
userIds: [asset.ownerId],
|
||||
});
|
||||
|
||||
let assetIds = [asset.id];
|
||||
if (duplicateAssets.length > 0) {
|
||||
this.logger.debug(
|
||||
`Found ${duplicateAssets.length} duplicate${duplicateAssets.length === 1 ? '' : 's'} for asset ${asset.id}`,
|
||||
);
|
||||
assetIds = await this.updateDuplicates(asset, duplicateAssets);
|
||||
} else if (asset.duplicateId) {
|
||||
this.logger.debug(`No duplicates found for asset ${asset.id}, removing duplicateId`);
|
||||
await this.assetRepository.update({ id: asset.id, duplicateId: null });
|
||||
}
|
||||
|
||||
const duplicatesDetectedAt = new Date();
|
||||
await this.assetRepository.upsertJobStatus(...assetIds.map((assetId) => ({ assetId, duplicatesDetectedAt })));
|
||||
|
||||
return JobStatus.SUCCESS;
|
||||
}
|
||||
|
||||
private async updateDuplicates(asset: AssetEntity, duplicateAssets: AssetDuplicateResult[]): Promise<string[]> {
|
||||
const duplicateIds = [
|
||||
...new Set(
|
||||
duplicateAssets
|
||||
.filter((asset): asset is AssetDuplicateResult & { duplicateId: string } => !!asset.duplicateId)
|
||||
.map((duplicate) => duplicate.duplicateId),
|
||||
),
|
||||
];
|
||||
|
||||
const targetDuplicateId = asset.duplicateId ?? duplicateIds.shift() ?? this.cryptoRepository.randomUUID();
|
||||
const assetIdsToUpdate = duplicateAssets
|
||||
.filter((asset) => asset.duplicateId !== targetDuplicateId)
|
||||
.map((duplicate) => duplicate.assetId);
|
||||
assetIdsToUpdate.push(asset.id);
|
||||
|
||||
await this.assetRepository.updateDuplicates({ targetDuplicateId, assetIds: assetIdsToUpdate, duplicateIds });
|
||||
return assetIdsToUpdate;
|
||||
}
|
||||
|
||||
private async getUserIdsToSearch(auth: AuthDto): Promise<string[]> {
|
||||
const userIds: string[] = [auth.user.id];
|
||||
const partners = await this.partnerRepository.getAll(auth.user.id);
|
||||
|
||||
@@ -164,6 +164,7 @@ describe(ServerInfoService.name, () => {
|
||||
it('should respond the server features', async () => {
|
||||
await expect(sut.getFeatures()).resolves.toEqual({
|
||||
smartSearch: true,
|
||||
duplicateDetection: false,
|
||||
facialRecognition: true,
|
||||
map: true,
|
||||
reverseGeocoding: true,
|
||||
|
||||
@@ -22,7 +22,7 @@ import { ISystemMetadataRepository } from 'src/interfaces/system-metadata.interf
|
||||
import { IUserRepository, UserStatsQueryResponse } from 'src/interfaces/user.interface';
|
||||
import { asHumanReadable } from 'src/utils/bytes';
|
||||
import { mimeTypes } from 'src/utils/mime-types';
|
||||
import { isFacialRecognitionEnabled, isSmartSearchEnabled } from 'src/utils/misc';
|
||||
import { isDuplicateDetectionEnabled, isFacialRecognitionEnabled, isSmartSearchEnabled } from 'src/utils/misc';
|
||||
import { Version } from 'src/utils/version';
|
||||
|
||||
@Injectable()
|
||||
@@ -88,6 +88,7 @@ export class ServerInfoService {
|
||||
return {
|
||||
smartSearch: isSmartSearchEnabled(machineLearning),
|
||||
facialRecognition: isFacialRecognitionEnabled(machineLearning),
|
||||
duplicateDetection: isDuplicateDetectionEnabled(machineLearning),
|
||||
map: map.enabled,
|
||||
reverseGeocoding: reverseGeocoding.enabled,
|
||||
sidecar: true,
|
||||
|
||||
@@ -79,6 +79,10 @@ const updatedConfig = Object.freeze<SystemConfig>({
|
||||
enabled: true,
|
||||
modelName: 'ViT-B-32__openai',
|
||||
},
|
||||
duplicateDetection: {
|
||||
enabled: false,
|
||||
maxDistance: 0.03,
|
||||
},
|
||||
facialRecognition: {
|
||||
enabled: true,
|
||||
modelName: 'buffalo_l',
|
||||
|
||||
Reference in New Issue
Block a user