feat: use pgvecto.rs (#3605)

This commit is contained in:
Jason Rasmussen
2023-12-08 11:15:46 -05:00
committed by GitHub
parent 429ad28810
commit 1e99ba8167
99 changed files with 1935 additions and 2583 deletions
@@ -0,0 +1,107 @@
export type ModelInfo = {
dimSize: number;
};
export const CLIP_MODEL_INFO: Record<string, ModelInfo> = {
RN50__openai: {
dimSize: 1024,
},
RN50__yfcc15m: {
dimSize: 1024,
},
RN50__cc12m: {
dimSize: 1024,
},
RN101__openai: {
dimSize: 512,
},
RN101__yfcc15m: {
dimSize: 512,
},
RN50x4__openai: {
dimSize: 640,
},
RN50x16__openai: {
dimSize: 768,
},
RN50x64__openai: {
dimSize: 1024,
},
'ViT-B-32__openai': {
dimSize: 512,
},
'ViT-B-32__laion2b_e16': {
dimSize: 512,
},
'ViT-B-32__laion400m_e31': {
dimSize: 512,
},
'ViT-B-32__laion400m_e32': {
dimSize: 512,
},
'ViT-B-32__laion2b-s34b-b79k': {
dimSize: 512,
},
'ViT-B-16__openai': {
dimSize: 512,
},
'ViT-B-16__laion400m_e31': {
dimSize: 512,
},
'ViT-B-16__laion400m_e32': {
dimSize: 512,
},
'ViT-B-16-plus-240__laion400m_e31': {
dimSize: 640,
},
'ViT-B-16-plus-240__laion400m_e32': {
dimSize: 640,
},
'ViT-L-14__openai': {
dimSize: 768,
},
'ViT-L-14__laion400m_e31': {
dimSize: 768,
},
'ViT-L-14__laion400m_e32': {
dimSize: 768,
},
'ViT-L-14__laion2b-s32b-b82k': {
dimSize: 768,
},
'ViT-L-14-336__openai': {
dimSize: 768,
},
'ViT-H-14__laion2b-s32b-b79k': {
dimSize: 1024,
},
'ViT-g-14__laion2b-s12b-b42k': {
dimSize: 1024,
},
'LABSE-Vit-L-14': {
dimSize: 768,
},
'XLM-Roberta-Large-Vit-B-32': {
dimSize: 512,
},
'XLM-Roberta-Large-Vit-B-16Plus': {
dimSize: 640,
},
'XLM-Roberta-Large-Vit-L-14': {
dimSize: 768,
},
};
export function cleanModelName(modelName: string): string {
const tokens = modelName.split('/');
return tokens[tokens.length - 1].replace(/:/g, '_');
}
export function getCLIPModelInfo(modelName: string): ModelInfo {
const modelInfo = CLIP_MODEL_INFO[cleanModelName(modelName)];
if (!modelInfo) {
throw new Error(`Unknown CLIP model: ${modelName}`);
}
return modelInfo;
}
@@ -16,6 +16,7 @@ import {
ISystemConfigRepository,
WithoutProperty,
} from '../repositories';
import { cleanModelName, getCLIPModelInfo } from './smart-info.constant';
import { SmartInfoService } from './smart-info.service';
const asset = {
@@ -195,10 +196,29 @@ describe(SmartInfoService.name, () => {
{ imagePath: 'path/to/resize.ext' },
{ enabled: true, modelName: 'ViT-B-32__openai' },
);
expect(smartMock.upsert).toHaveBeenCalledWith({
assetId: 'asset-1',
clipEmbedding: [0.01, 0.02, 0.03],
});
expect(smartMock.upsert).toHaveBeenCalledWith(
{
assetId: 'asset-1',
},
[0.01, 0.02, 0.03],
);
});
});
describe('cleanModelName', () => {
it('should clean name', () => {
expect(cleanModelName('ViT-B-32::openai')).toEqual('ViT-B-32__openai');
expect(cleanModelName('M-CLIP/XLM-Roberta-Large-Vit-L-14')).toEqual('XLM-Roberta-Large-Vit-L-14');
});
});
describe('getCLIPModelInfo', () => {
it('should return the model info', () => {
expect(getCLIPModelInfo('ViT-B-32__openai')).toEqual({ dimSize: 512 });
});
it('should throw an error if the model is not present', () => {
expect(() => getCLIPModelInfo('test-model')).toThrow('Unknown CLIP model: test-model');
});
});
});
@@ -1,6 +1,7 @@
import { Inject, Injectable } from '@nestjs/common';
import { Inject, Injectable, Logger } from '@nestjs/common';
import { setTimeout } from 'timers/promises';
import { usePagination } from '../domain.util';
import { IBaseJob, IEntityJob, JOBS_ASSET_PAGINATION_SIZE, JobName } from '../job';
import { IBaseJob, IEntityJob, JOBS_ASSET_PAGINATION_SIZE, JobName, QueueName } from '../job';
import {
IAssetRepository,
IJobRepository,
@@ -14,6 +15,7 @@ import { SystemConfigCore } from '../system-config';
@Injectable()
export class SmartInfoService {
private configCore: SystemConfigCore;
private logger = new Logger(SmartInfoService.name);
constructor(
@Inject(IAssetRepository) private assetRepository: IAssetRepository,
@@ -25,6 +27,24 @@ export class SmartInfoService {
this.configCore = SystemConfigCore.create(configRepository);
}
async init() {
await this.jobRepository.pause(QueueName.CLIP_ENCODING);
let { isActive } = await this.jobRepository.getQueueStatus(QueueName.CLIP_ENCODING);
while (isActive) {
this.logger.verbose('Waiting for CLIP encoding queue to stop...');
await setTimeout(1000).then(async () => {
({ isActive } = await this.jobRepository.getQueueStatus(QueueName.CLIP_ENCODING));
});
}
const { machineLearning } = await this.configCore.getConfig();
await this.repository.init(machineLearning.clip.modelName);
await this.jobRepository.resume(QueueName.CLIP_ENCODING);
}
async handleQueueObjectTagging({ force }: IBaseJob) {
const { machineLearning } = await this.configCore.getConfig();
if (!machineLearning.enabled || !machineLearning.classification.enabled) {
@@ -105,7 +125,7 @@ export class SmartInfoService {
machineLearning.clip,
);
await this.repository.upsert({ assetId: asset.id, clipEmbedding: clipEmbedding });
await this.repository.upsert({ assetId: asset.id }, clipEmbedding);
return true;
}