@@ -277,15 +277,25 @@ levenshtein_internal(text *s, text *t,
277277 ++ n ;
278278
279279 /*
280- * Instead of building an (m+1)x(n+1) array, we'll use two different
281- * arrays of size m+1 for storing accumulated values. At each step one
282- * represents the "previous" row and one is the "current" row of the
283- * notional large array.
280+ * One way to compute Levenshtein distance is to incrementally construct
281+ * an (m+1)x(n+1) matrix where cell (i, j) represents the minimum number
282+ * of operations required to transform the first i characters of s into
283+ * the first j characters of t. The last column of the final row is the
284+ * answer.
285+ *
286+ * We use that algorithm here with some modification. In lieu of holding
287+ * the entire array in memory at once, we'll just use two arrays of size
288+ * m+1 for storing accumulated values. At each step one array represents
289+ * the "previous" row and one is the "current" row of the notional large
290+ * array.
284291 */
285292 prev = (int * ) palloc (2 * m * sizeof (int ));
286293 curr = prev + m ;
287294
288- /* Initialize the "previous" row to 0..cols */
295+ /*
296+ * To transform the first i characters of s into the first 0 characters
297+ * of t, we must perform i deletions.
298+ */
289299 for (i = 0 ; i < m ; i ++ )
290300 prev [i ] = i * del_c ;
291301
@@ -297,8 +307,8 @@ levenshtein_internal(text *s, text *t,
297307 int y_char_len = n != t_bytes + 1 ? pg_mblen (y ) : 1 ;
298308
299309 /*
300- * First cell must increment sequentially, as we're on the j'th row of
301- * the (m+1)x(n+1) array .
310+ * To transform the first 0 characters of s into the first j
311+ * characters of t, we must perform j insertions .
302312 */
303313 curr [0 ] = j * ins_c ;
304314
0 commit comments