All files / src/modules reranker.ts

85.36% Statements 70/82
60.86% Branches 14/23
100% Functions 6/6
85.36% Lines 70/82

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146                                                                                    1x 1x 1x 1x 1x   1x 1x 1x 1x 1x       21x 21x 21x 21x 21x 21x 84x 84x 21x 21x       1x 3x 3x 3x 3x 3x 3x   3x 2x   2x 6x 6x 6x 2x 2x 2x 2x           2x       2x 2x 2x 2x 2x 2x 2x 2x             2x   2x       2x 2x   3x 3x 1x 1x 1x   3x 1x 1x 1x 3x 3x         1x 4x 15x 15x 15x 4x 4x 4x 4x  
/**
 * AgentKits — Reranker Module (Enhanced)
 *
 * Cross-encoder and API-based reranking for RAG quality.
 * Supports: Cohere Rerank, Jina, custom scoring.
 *
 * Usage:
 *   import { createCrossEncoder } from 'agentkits/reranker';
 *   const reranker = createCrossEncoder({ provider: 'cohere', apiKey: '...' });
 *   const results = await reranker.rerank('query', documents);
 */
 
// ── Types ──────────────────────────────────────────────────────────
 
export type RerankerProvider = 'cohere' | 'jina' | 'custom';
 
export interface RerankerConfig {
  provider?: RerankerProvider;
  apiKey?: string;
  model?: string;
  baseUrl?: string;
  topK?: number;
}
 
export interface RerankedDocument {
  index: number;
  text: string;
  score: number;
}
 
export interface RerankerClient {
  /** Rerank documents by relevance to query */
  rerank(query: string, documents: string[], options?: { topK?: number }): Promise<RerankedDocument[]>;
  /** Score a single query-document pair */
  score(query: string, document: string): Promise<number>;
}
 
interface ProviderDefaults {
  model: string;
  baseUrl: string;
}
 
const PROVIDER_DEFAULTS: Record<RerankerProvider, ProviderDefaults> = {
  cohere: { model: 'rerank-v3.5', baseUrl: 'https://api.cohere.com/v2' },
  jina:   { model: 'jina-reranker-v2-base-multilingual', baseUrl: 'https://api.jina.ai/v1' },
  custom: { model: 'rerank', baseUrl: 'http://localhost:8080' },
};
 
const ENV_MAP: Record<RerankerProvider, string[]> = {
  cohere: ['COHERE_API_KEY'],
  jina:   ['JINA_API_KEY'],
  custom: ['RERANKER_API_KEY'],
};
 
// ── Simple TF-IDF based fallback scorer ────────────────────────────
 
function tfidfScore(query: string, document: string): number {
  const qTokens = query.toLowerCase().split(/\W+/).filter(Boolean);
  const dTokens = document.toLowerCase().split(/\W+/).filter(Boolean);
  const dSet = new Set(dTokens);
  let hits = 0;
  for (const t of qTokens) {
    if (dSet.has(t)) hits++;
  }
  return qTokens.length > 0 ? hits / qTokens.length : 0;
}
 
// ── Factory ────────────────────────────────────────────────────────
 
export function createCrossEncoder(config: RerankerConfig = {}): RerankerClient {
  const provider = config.provider ?? 'cohere';
  const defaults = PROVIDER_DEFAULTS[provider];
  const model = config.model ?? defaults.model;
  const baseUrl = config.baseUrl ?? defaults.baseUrl;
  const apiKey = config.apiKey ?? ENV_MAP[provider].map(k => process.env[k]).find(Boolean);
  const defaultTopK = config.topK ?? 10;
 
  async function callAPI(query: string, documents: string[], topK: number): Promise<RerankedDocument[]> {
    if (!apiKey) {
      // Fallback to TF-IDF scoring
      const scored = documents.map((text, index) => ({
        index,
        text,
        score: tfidfScore(query, text),
      }));
      scored.sort((a, b) => b.score - a.score);
      return scored.slice(0, topK);
    }
 
    const url = provider === 'cohere'
      ? `${baseUrl}/rerank`
      : `${baseUrl}/rerank`;
 
    const body = provider === 'cohere'
      ? { model, query, documents, top_n: topK }
      : { model, query, documents, top_n: topK };
 
    const resp = await fetch(url, {
      method: 'POST',
      headers: {
        'Content-Type': 'application/json',
        'Authorization': `Bearer ${apiKey}`,
      },
      body: JSON.stringify(body),
    });
 
    if (!resp.ok) {
      throw new Error(`[AgentKits] Reranker API error: ${resp.status} ${await resp.text()}`);
    }
 
    const data: any = await resp.json();
    const results = data.results ?? data.data ?? [];
 
    return results.map((r: any) => ({
      index: r.index,
      text: documents[r.index],
      score: r.relevance_score ?? r.score ?? 0,
    }));
  }
 
  return {
    async rerank(query, documents, options) {
      const topK = options?.topK ?? defaultTopK;
      return callAPI(query, documents, Math.min(topK, documents.length));
    },
 
    async score(query, document) {
      const results = await callAPI(query, [document], 1);
      return results[0]?.score ?? 0;
    },
  };
}
 
/**
 * Quick rerank with TF-IDF (no API needed).
 */
export function localRerank(query: string, documents: string[], topK = 10): RerankedDocument[] {
  const scored = documents.map((text, index) => ({
    index,
    text,
    score: tfidfScore(query, text),
  }));
  scored.sort((a, b) => b.score - a.score);
  return scored.slice(0, topK);
}