/** * Similar to Python's difflib.SequenceMatcher * * A flexible class for comparing pairs of sequences of any type. * Uses the Ratcliff-Obershelp algorithm with "gestalt pattern matching" * to find the longest contiguous matching subsequences. */ export interface Match { /** Starting position in sequence a */ a: number; /** Starting position in sequence b */ b: number; /** Length of the matching block */ size: number; } export type OpCode = "replace" | "delete" | "insert" | "equal"; export interface OpCodeTuple { /** Operation type */ tag: OpCode; /** Start index in sequence a */ i1: number; /** End index in sequence a */ i2: number; /** Start index in sequence b */ j1: number; /** End index in sequence b */ j2: number; } export type JunkFunction = (element: T) => boolean; export class SequenceMatcher { private isjunk: JunkFunction | null; private a: T[]; private b: T[]; private autojunk: boolean; // Cached data structures for sequence b private bjunk: Set; private bpopular: Set; private b2j: Map; // Cached results private fullbcount: Map | null = null; private matchingBlocks: Match[] | null = null; private opcodes: OpCodeTuple[] | null = null; constructor( isjunk: JunkFunction | null = null, a: T[] = [], b: T[] = [], autojunk: boolean = true, ) { this.isjunk = isjunk; this.a = []; this.b = []; this.autojunk = autojunk; this.bjunk = new Set(); this.bpopular = new Set(); this.b2j = new Map(); this.setSeqs(a, b); } /** * Set both sequences to be compared */ setSeqs(a: T[], b: T[]): void { this.setSeq1(a); this.setSeq2(b); } /** * Set the first sequence to be compared */ setSeq1(a: T[]): void { if (a === this.a) return; this.a = [...a]; this.matchingBlocks = null; this.opcodes = null; } /** * Set the second sequence to be compared */ setSeq2(b: T[]): void { if (b === this.b) return; this.b = [...b]; this.matchingBlocks = null; this.opcodes = null; this.fullbcount = null; this.chainB(); } /** * Analyze sequence b and build lookup structures */ private chainB(): void { const b = this.b; this.bjunk = new Set(); this.bpopular = new Set(); this.b2j = new Map(); // Count occurrences of each element const elementCounts = new Map(); for (const element of b) { elementCounts.set(element, (elementCounts.get(element) || 0) + 1); } // Determine junk and popular elements const n = b.length; const popularThreshold = Math.floor(n / 100) + 1; // > 1% of sequence for (const [element, count] of elementCounts) { if (this.isjunk && this.isjunk(element)) { this.bjunk.add(element); } else if (this.autojunk && n >= 200 && count > popularThreshold) { this.bpopular.add(element); } } // Build position mapping for non-junk, non-popular elements for (let i = 0; i < b.length; i++) { const element = b[i]; if (!this.bjunk.has(element) && !this.bpopular.has(element)) { if (!this.b2j.has(element)) { this.b2j.set(element, []); } this.b2j.get(element)!.push(i); } } } /** * Find the longest matching block in a[alo:ahi] and b[blo:bhi] */ findLongestMatch( alo: number = 0, ahi: number | null = null, blo: number = 0, bhi: number | null = null, ): Match { if (ahi === null) ahi = this.a.length; if (bhi === null) bhi = this.b.length; let besti = alo; let bestj = blo; let bestsize = 0; // Find all positions where a[i] appears in b const j2len = new Map(); for (let i = alo; i < ahi; i++) { const element = this.a[i]; const positions = this.b2j.get(element) || []; const newj2len = new Map(); for (const j of positions) { if (j < blo) continue; if (j >= bhi) break; const prevLen = j2len.get(j - 1) || 0; const k = prevLen + 1; newj2len.set(j, k); if (k > bestsize) { besti = i - k + 1; bestj = j - k + 1; bestsize = k; } } j2len.clear(); for (const [key, value] of newj2len) { j2len.set(key, value); } } // Extend match with junk elements while ( besti > alo && bestj > blo && !this.isBJunk(this.b[bestj - 1]) && this.elementsEqual(this.a[besti - 1], this.b[bestj - 1]) ) { besti--; bestj--; bestsize++; } while ( besti + bestsize < ahi && bestj + bestsize < bhi && !this.isBJunk(this.b[bestj + bestsize]) && this.elementsEqual(this.a[besti + bestsize], this.b[bestj + bestsize]) ) { bestsize++; } // Extend match with junk elements at the beginning while (besti > alo && bestj > blo && this.isBJunk(this.b[bestj - 1])) { besti--; bestj--; bestsize++; } // Extend match with junk elements at the end while ( besti + bestsize < ahi && bestj + bestsize < bhi && this.isBJunk(this.b[bestj + bestsize]) ) { bestsize++; } return { a: besti, b: bestj, size: bestsize }; } /** * Return list of non-overlapping matching blocks */ getMatchingBlocks(): Match[] { if (this.matchingBlocks !== null) { return this.matchingBlocks; } const matches: Match[] = []; this.getMatchingBlocksRecursive( 0, this.a.length, 0, this.b.length, matches, ); // Add sentinel matches.push({ a: this.a.length, b: this.b.length, size: 0 }); this.matchingBlocks = matches; return matches; } /** * Recursively find matching blocks */ private getMatchingBlocksRecursive( alo: number, ahi: number, blo: number, bhi: number, matches: Match[], ): void { const match = this.findLongestMatch(alo, ahi, blo, bhi); if (match.size > 0) { // Recurse on the pieces before and after the match if (alo < match.a && blo < match.b) { this.getMatchingBlocksRecursive( alo, match.a, blo, match.b, matches, ); } matches.push(match); if (match.a + match.size < ahi && match.b + match.size < bhi) { this.getMatchingBlocksRecursive( match.a + match.size, ahi, match.b + match.size, bhi, matches, ); } } } /** * Return list of 5-tuples describing how to turn a into b */ getOpcodes(): OpCodeTuple[] { if (this.opcodes !== null) { return this.opcodes; } let i = 0; let j = 0; const opcodes: OpCodeTuple[] = []; for (const match of this.getMatchingBlocks()) { let tag: OpCode = "equal"; if (i < match.a && j < match.b) { tag = "replace"; } else if (i < match.a) { tag = "delete"; } else if (j < match.b) { tag = "insert"; } if (tag !== "equal") { opcodes.push({ tag, i1: i, i2: match.a, j1: j, j2: match.b, }); } i = match.a + match.size; j = match.b + match.size; // Don't add the sentinel match if (match.size > 0) { opcodes.push({ tag: "equal", i1: match.a, i2: i, j1: match.b, j2: j, }); } } this.opcodes = opcodes; return opcodes; } /** * Return a measure of sequences' similarity (0.0-1.0) */ ratio(): number { const matches = this.getMatchingBlocks() .slice(0, -1) // Exclude sentinel .reduce((sum, match) => sum + match.size, 0); const total = this.a.length + this.b.length; return total === 0 ? 1.0 : (2.0 * matches) / total; } /** * Return an upper bound on ratio() relatively quickly */ quickRatio(): number { if (this.fullbcount === null) { this.fullbcount = new Map(); for (const element of this.b) { this.fullbcount.set( element, (this.fullbcount.get(element) || 0) + 1, ); } } let matches = 0; const tempCounts = new Map(this.fullbcount); for (const element of this.a) { const count = tempCounts.get(element); if (count && count > 0) { matches++; tempCounts.set(element, count - 1); } } const total = this.a.length + this.b.length; return total === 0 ? 1.0 : (2.0 * matches) / total; } /** * Return an upper bound on ratio() very quickly */ realQuickRatio(): number { const total = this.a.length + this.b.length; return total === 0 ? 1.0 : (2.0 * Math.min(this.a.length, this.b.length)) / total; } /** * Check if element is junk in sequence b */ private isBJunk(element: T): boolean { return this.bjunk.has(element); } /** * Check if two elements are equal */ private elementsEqual(a: T, b: T): boolean { return a === b; } } /** * Utility function to get close matches similar to Python's get_close_matches */ export function getCloseMatches( word: T[], possibilities: T[][], n: number = 3, cutoff: number = 0.6, ): T[][] { if (n <= 0) { throw new Error("n must be greater than 0"); } const matches: Array<{ sequence: T[]; ratio: number }> = []; for (const possibility of possibilities) { const matcher = new SequenceMatcher(null, word, possibility); const ratio = matcher.ratio(); if (ratio >= cutoff) { matches.push({ sequence: possibility, ratio }); } } // Sort by ratio (descending) and take top n matches.sort((a, b) => b.ratio - a.ratio); return matches.slice(0, n).map((match) => match.sequence); } /** * String-specific version of SequenceMatcher for character-by-character comparison. * This class treats strings as sequences of characters while providing a string-friendly API. */ export class StringSequenceMatcher { private matcher: SequenceMatcher; constructor( isjunk: JunkFunction | null = null, a: string = "", b: string = "", autojunk: boolean = true, ) { this.matcher = new SequenceMatcher( isjunk, Array.from(a), Array.from(b), autojunk, ); } /** * Set both sequences to be compared */ setSeqs(a: string, b: string): void { this.matcher.setSeqs(Array.from(a), Array.from(b)); } /** * Set the first sequence to be compared */ setSeq1(a: string): void { this.matcher.setSeq1(Array.from(a)); } /** * Set the second sequence to be compared */ setSeq2(b: string): void { this.matcher.setSeq2(Array.from(b)); } /** * Find the longest matching block in a[alo:ahi] and b[blo:bhi] */ findLongestMatch( alo: number = 0, ahi: number | null = null, blo: number = 0, bhi: number | null = null, ): Match { return this.matcher.findLongestMatch(alo, ahi, blo, bhi); } /** * Return list of non-overlapping matching blocks */ getMatchingBlocks(): Match[] { return this.matcher.getMatchingBlocks(); } /** * Return list of 5-tuples describing how to turn a into b */ getOpcodes(): OpCodeTuple[] { return this.matcher.getOpcodes(); } /** * Return a measure of sequences' similarity (0.0-1.0) */ ratio(): number { return this.matcher.ratio(); } /** * Return an upper bound on ratio() relatively quickly */ quickRatio(): number { return this.matcher.quickRatio(); } /** * Return an upper bound on ratio() very quickly */ realQuickRatio(): number { return this.matcher.realQuickRatio(); } } /** * Utility function for string similarity */ export function getStringSimilarity(a: string, b: string): number { const matcher = new StringSequenceMatcher(null, a, b); return matcher.ratio(); } /** * Get close string matches */ export function getCloseStringMatches( word: string, possibilities: string[], n: number = 3, cutoff: number = 0.6, ): string[] { if (n <= 0) { throw new Error("n must be greater than 0"); } const matches: Array<{ string: string; ratio: number }> = []; for (const possibility of possibilities) { const ratio = getStringSimilarity(word, possibility); if (ratio >= cutoff) { matches.push({ string: possibility, ratio }); } } // Sort by ratio (descending) and take top n matches.sort((a, b) => b.ratio - a.ratio); return matches.slice(0, n).map((match) => match.string); }