In this part, we’ll build vector stores using FAISS and implement semantic search capabilities. Vector stores enable us to find relevant information based on meaning rather than exact keyword matches.
Understanding Vector Stores
Vector stores are databases optimized for storing and searching high-dimensional vectors (embeddings). They enable semantic search, which finds content based on meaning rather than exact keyword matches.
Setting Up Embeddings
npm install @langchain/community
npm install faiss-node
// src/services/embeddingsService.js
import { AzureOpenAIEmbeddings } from "@langchain/openai";
export class EmbeddingsService {
constructor() {
this.embeddings = new AzureOpenAIEmbeddings({
azureOpenAIApiKey: process.env.AZURE_OPENAI_API_KEY,
azureOpenAIApiInstanceName: "your-instance",
azureOpenAIApiEmbeddingsDeploymentName: "text-embedding-ada-002",
azureOpenAIApiVersion: "2024-02-15-preview",
});
}
async generateEmbedding(text) {
try {
return await this.embeddings.embedQuery(text);
} catch (error) {
console.error('Error generating embedding:', error);
throw new Error('Failed to generate embedding');
}
}
}
Vector Store Implementation
// src/services/vectorStoreService.js
import { FaissStore } from "@langchain/community/vectorstores/faiss";
import { EmbeddingsService } from './embeddingsService.js';
export class VectorStoreService {
constructor() {
this.embeddingsService = new EmbeddingsService();
this.embeddings = this.embeddingsService.embeddings;
this.vectorStore = null;
}
async createVectorStore(documents) {
try {
this.vectorStore = await FaissStore.fromDocuments(
documents,
this.embeddings
);
return this.vectorStore;
} catch (error) {
throw new Error('Failed to create vector store');
}
}
async similaritySearch(query, k = 5) {
if (!this.vectorStore) {
throw new Error('Vector store not initialized');
}
return await this.vectorStore.similaritySearch(query, k);
}
async similaritySearchWithScore(query, k = 5) {
if (!this.vectorStore) {
throw new Error('Vector store not initialized');
}
const results = await this.vectorStore.similaritySearchWithScore(query, k);
return results.map(([doc, score]) => ({
document: doc,
score: score,
relevanceLevel: score < 0.3 ? 'high' : score < 0.6 ? 'medium' : 'low',
}));
}
}
Search API Endpoint
// src/routes/search.js
import express from 'express';
import { VectorStoreService } from '../services/vectorStoreService.js';
const router = express.Router();
const vectorStoreService = new VectorStoreService();
router.post('/semantic', async (req, res) => {
try {
const { query, options = {} } = req.body;
if (!query) {
return res.status(400).json({ error: 'Query is required' });
}
const { k = 5, includeScores = true } = options;
let results;
if (includeScores) {
results = await vectorStoreService.similaritySearchWithScore(query, k);
} else {
results = await vectorStoreService.similaritySearch(query, k);
}
res.json({
query,
resultCount: results.length,
results,
});
} catch (error) {
res.status(500).json({ error: error.message });
}
});
export default router;
Testing Vector Search
// Test data
const testDocuments = [
{
pageContent: "LangChain is a framework for developing applications powered by language models.",
metadata: { source: "langchain-intro" }
},
{
pageContent: "Azure OpenAI offers enterprise-grade AI services with robust security.",
metadata: { source: "azure-overview" }
}
];
// Create vector store and test
const vectorStore = new VectorStoreService();
await vectorStore.createVectorStore(testDocuments);
const results = await vectorStore.similaritySearch("AI frameworks", 2);
console.log(results);
In Part 6, we’ll implement Retrieval-Augmented Generation (RAG) to answer questions based on your documents!