refactor: stream search duplicates (#17991)

This commit is contained in:
Jason Rasmussen
2025-04-30 10:40:32 -04:00
committed by GitHub
parent e3812a0e36
commit 8c5116bc1d
5 changed files with 60 additions and 42 deletions
+5 -12
View File
@@ -1,10 +1,9 @@
import { AssetFileType, AssetType, JobName, JobStatus } from 'src/enum';
import { WithoutProperty } from 'src/repositories/asset.repository';
import { DuplicateService } from 'src/services/duplicate.service';
import { SearchService } from 'src/services/search.service';
import { assetStub } from 'test/fixtures/asset.stub';
import { authStub } from 'test/fixtures/auth.stub';
import { newTestService, ServiceMocks } from 'test/utils';
import { makeStream, newTestService, ServiceMocks } from 'test/utils';
import { beforeEach, vitest } from 'vitest';
vitest.useFakeTimers();
@@ -113,14 +112,11 @@ describe(SearchService.name, () => {
});
it('should queue missing assets', async () => {
mocks.asset.getWithout.mockResolvedValue({
items: [assetStub.image],
hasNextPage: false,
});
mocks.assetJob.streamForSearchDuplicates.mockReturnValue(makeStream([assetStub.image]));
await sut.handleQueueSearchDuplicates({});
expect(mocks.asset.getWithout).toHaveBeenCalledWith({ skip: 0, take: 1000 }, WithoutProperty.DUPLICATE);
expect(mocks.assetJob.streamForSearchDuplicates).toHaveBeenCalledWith(undefined);
expect(mocks.job.queueAll).toHaveBeenCalledWith([
{
name: JobName.DUPLICATE_DETECTION,
@@ -130,14 +126,11 @@ describe(SearchService.name, () => {
});
it('should queue all assets', async () => {
mocks.asset.getAll.mockResolvedValue({
items: [assetStub.image],
hasNextPage: false,
});
mocks.assetJob.streamForSearchDuplicates.mockReturnValue(makeStream([assetStub.image]));
await sut.handleQueueSearchDuplicates({ force: true });
expect(mocks.asset.getAll).toHaveBeenCalled();
expect(mocks.assetJob.streamForSearchDuplicates).toHaveBeenCalledWith(true);
expect(mocks.job.queueAll).toHaveBeenCalledWith([
{
name: JobName.DUPLICATE_DETECTION,
+14 -12
View File
@@ -5,13 +5,11 @@ import { mapAsset } from 'src/dtos/asset-response.dto';
import { AuthDto } from 'src/dtos/auth.dto';
import { DuplicateResponseDto } from 'src/dtos/duplicate.dto';
import { AssetFileType, JobName, JobStatus, QueueName } from 'src/enum';
import { WithoutProperty } from 'src/repositories/asset.repository';
import { AssetDuplicateResult } from 'src/repositories/search.repository';
import { BaseService } from 'src/services/base.service';
import { JobOf } from 'src/types';
import { JobItem, JobOf } from 'src/types';
import { getAssetFile } from 'src/utils/asset.util';
import { isDuplicateDetectionEnabled } from 'src/utils/misc';
import { usePagination } from 'src/utils/pagination';
@Injectable()
export class DuplicateService extends BaseService {
@@ -30,18 +28,22 @@ export class DuplicateService extends BaseService {
return JobStatus.SKIPPED;
}
const assetPagination = usePagination(JOBS_ASSET_PAGINATION_SIZE, (pagination) => {
return force
? this.assetRepository.getAll(pagination, { isVisible: true })
: this.assetRepository.getWithout(pagination, WithoutProperty.DUPLICATE);
});
let jobs: JobItem[] = [];
const queueAll = async () => {
await this.jobRepository.queueAll(jobs);
jobs = [];
};
for await (const assets of assetPagination) {
await this.jobRepository.queueAll(
assets.map((asset) => ({ name: JobName.DUPLICATE_DETECTION, data: { id: asset.id } })),
);
const assets = this.assetJobRepository.streamForSearchDuplicates(force);
for await (const asset of assets) {
jobs.push({ name: JobName.DUPLICATE_DETECTION, data: { id: asset.id } });
if (jobs.length >= JOBS_ASSET_PAGINATION_SIZE) {
await queueAll();
}
}
await queueAll();
return JobStatus.SUCCESS;
}