Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 | 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 1x 30x 30x 27x 27x 1x 13x 13x 13x 13x 5x 5x 13x 6x 6x 6x 6x 6x 6x 6x 6x 13x 4x 4x 4x 13x 13x 1x 9x 9x 9x 9x 1x 7x 7x | /**
* AgentKits — Token Counter Module
*
* Estimate token count for text using tiktoken-like heuristics.
* Supports different model tokenizer approximations.
*
* Usage:
* import { countTokens, createTokenCounter } from 'agentkits/token-counter';
* const count = countTokens('Hello world', 'gpt-4o');
*/
// ── Types ──────────────────────────────────────────────────────────
export type TokenizerModel = 'cl100k' | 'p50k' | 'o200k' | 'default';
export interface TokenCounterConfig {
/** Default tokenizer model to use */
model?: TokenizerModel;
/** Characters-per-token ratio override (default depends on model) */
charsPerToken?: number;
}
export interface TokenCounter {
/** Count tokens in text */
count(text: string): number;
/** Count tokens for chat messages */
countMessages(messages: Array<{ role: string; content: string }>): number;
/** Estimate cost given price per 1M tokens */
estimateCost(text: string, pricePerMillion: number): number;
}
// ── Model-to-tokenizer mapping ─────────────────────────────────────
const MODEL_TOKENIZER_MAP: Record<string, TokenizerModel> = {
'gpt-4o': 'o200k',
'gpt-4o-mini': 'o200k',
'gpt-4-turbo': 'cl100k',
'gpt-4': 'cl100k',
'gpt-3.5-turbo': 'cl100k',
'text-davinci-003': 'p50k',
};
// Average characters per token for each tokenizer family
const CHARS_PER_TOKEN: Record<TokenizerModel, number> = {
cl100k: 4.0,
p50k: 4.0,
o200k: 3.8,
default: 4.0,
};
// Overhead tokens per message for chat format
const MESSAGE_OVERHEAD = 4; // <|im_start|>role\ncontent<|im_end|>
const CHAT_OVERHEAD = 3; // every reply is primed with <|im_start|>assistant
// ── Heuristic tokenizer ────────────────────────────────────────────
function estimateTokens(text: string, charsPerToken: number): number {
if (!text) return 0;
// Heuristic: count by character ratio, with adjustments for whitespace/punctuation
return Math.ceil(text.length / charsPerToken);
}
// ── Factory ────────────────────────────────────────────────────────
export function createTokenCounter(config: TokenCounterConfig = {}): TokenCounter {
const model = config.model ?? 'default';
const charsPerToken = config.charsPerToken ?? CHARS_PER_TOKEN[model] ?? CHARS_PER_TOKEN.default;
return {
count(text: string): number {
return estimateTokens(text, charsPerToken);
},
countMessages(messages: Array<{ role: string; content: string }>): number {
let total = CHAT_OVERHEAD;
for (const msg of messages) {
total += MESSAGE_OVERHEAD;
total += estimateTokens(msg.role, charsPerToken);
total += estimateTokens(msg.content, charsPerToken);
}
return total;
},
estimateCost(text: string, pricePerMillion: number): number {
const tokens = estimateTokens(text, charsPerToken);
return (tokens / 1_000_000) * pricePerMillion;
},
};
}
// ── Convenience ────────────────────────────────────────────────────
/**
* Quick token count for a string. Optionally specify a model name for better accuracy.
*/
export function countTokens(text: string, model?: string): number {
const tokenizer = model ? (MODEL_TOKENIZER_MAP[model] ?? 'default') : 'default';
const charsPerToken = CHARS_PER_TOKEN[tokenizer];
return estimateTokens(text, charsPerToken);
}
/**
* Resolve which tokenizer family a model uses.
*/
export function getTokenizerForModel(model: string): TokenizerModel {
return MODEL_TOKENIZER_MAP[model] ?? 'default';
}
|