Apply this patch to String::Similarity 0.02 from CPAN to turn it into String::Unicode::Similarity, required by the MusicBrainz server. Only in String-Similarity-0.02: blib diff -r -u t/String-Similarity-0.02/Changes String-Similarity-0.02/Changes --- t/String-Similarity-0.02/Changes 2000-11-16 00:30:35.000000000 +0000 +++ String-Similarity-0.02/Changes 2003-01-07 19:41:04.000000000 +0000 @@ -1,4 +1,4 @@ -Revision history for Perl extension String::Similarity +Revision history for Perl extension String::Unicode::Similarity 0.02 Thu Nov 16 01:30:16 CET 2000 - added minimum_similarity limit to speed up some searches diff -r -u t/String-Similarity-0.02/fstrcmp.c String-Similarity-0.02/fstrcmp.c --- t/String-Similarity-0.02/fstrcmp.c 2000-11-16 00:35:28.000000000 +0000 +++ String-Similarity-0.02/fstrcmp.c 2003-01-07 19:33:22.000000000 +0000 @@ -50,7 +50,7 @@ struct string_data { /* The string to be compared. */ - const char *data; + const INT32 *data; /* The length of the string to be compared. */ int data_length; @@ -161,8 +161,8 @@ { int *const fd = fdiag; /* Give the compiler a chance. */ int *const bd = bdiag; /* Additional help for the compiler. */ - const char *const xv = string[0].data; /* Still more help for the compiler. */ - const char *const yv = string[1].data; /* And more and more . . . */ + const INT32 *const xv = string[0].data; /* Still more help for the compiler. */ + const INT32 *const yv = string[1].data; /* And more and more . . . */ const int dmin = xoff - ylim; /* Minimum valid diagonal. */ const int dmax = xlim - yoff; /* Maximum valid diagonal. */ const int fmid = xoff - yoff; /* Center diagonal of top-down search. */ @@ -485,8 +485,8 @@ int ylim; int minimal; { - const char *const xv = string[0].data; /* Help the compiler. */ - const char *const yv = string[1].data; + const INT32 *const xv = string[0].data; /* Help the compiler. */ + const INT32 *const yv = string[1].data; if (string[1].edit_count + string[0].edit_count > max_edits) return; @@ -560,7 +560,7 @@ fstrcmp - fuzzy string compare SYNOPSIS - double fstrcmp(const char *, const char *, double); + double fstrcmp(const INT32 *, const INT32 *, int, int, double); DESCRIPTION The fstrcmp function may be used to compare two string for @@ -574,7 +574,7 @@ similar. */ double -fstrcmp (const char *string1, const char *string2, double minimum) +fstrcmp (const INT32 *string1, const INT32 *string2, int length1, int length2, double minimum) { int i; @@ -584,9 +584,9 @@ /* set the info for each string. */ string[0].data = string1; - string[0].data_length = strlen (string1); + string[0].data_length = length1; string[1].data = string2; - string[1].data_length = strlen (string2); + string[1].data_length = length2; /* short-circuit obvious comparisons */ if (string[0].data_length == 0 && string[1].data_length == 0) diff -r -u t/String-Similarity-0.02/fstrcmp.h String-Similarity-0.02/fstrcmp.h --- t/String-Similarity-0.02/fstrcmp.h 2000-11-16 00:28:38.000000000 +0000 +++ String-Similarity-0.02/fstrcmp.h 2003-01-07 19:32:10.000000000 +0000 @@ -22,6 +22,8 @@ #define PARAMS(proto) proto -double fstrcmp (const char *__s1, const char *__s2, double __minimum); +#define INT32 long + +double fstrcmp (const INT32 *__s1, const INT32 *__s2, int __l1, int __l2, double __minimum); #endif Only in String-Similarity-0.02: Makefile diff -r -u t/String-Similarity-0.02/Makefile.PL String-Similarity-0.02/Makefile.PL --- t/String-Similarity-0.02/Makefile.PL 2000-04-07 07:09:10.000000000 +0100 +++ String-Similarity-0.02/Makefile.PL 2003-01-10 23:21:58.000000000 +0000 @@ -6,7 +6,20 @@ COMPRESS => 'gzip -9v', SUFFIX => '.gz', }, - NAME => "String::Similarity", + NAME => "String::Unicode::Similarity", VERSION_FROM => "Similarity.pm", ); +print < and Marc Lehmann ). + Unicode (UCS-4) patch by Dave Evans diff -r -u t/String-Similarity-0.02/Similarity.xs String-Similarity-0.02/Similarity.xs --- t/String-Similarity-0.02/Similarity.xs 2000-11-16 00:28:17.000000000 +0000 +++ String-Similarity-0.02/Similarity.xs 2003-01-07 19:51:13.000000000 +0000 @@ -5,12 +5,13 @@ #include "fstrcmp.h" #include "fstrcmp.c" -MODULE = String::Similarity PACKAGE = String::Similarity +MODULE = String::Unicode::Similarity PACKAGE = String::Unicode::Similarity double -fstrcmp(s1, s2, minimum_similarity = 0) +fstrcmp(s1, s2, l1, l2, minimum_similarity = 0) char * s1 char * s2 + int l1 + int l2 double minimum_similarity - PROTOTYPE: @ diff -r -u t/String-Similarity-0.02/t/00_load.t String-Similarity-0.02/t/00_load.t --- t/String-Similarity-0.02/t/00_load.t 2000-04-07 07:12:01.000000000 +0100 +++ String-Similarity-0.02/t/00_load.t 2003-01-07 19:52:08.000000000 +0000 @@ -1,6 +1,6 @@ -BEGIN { $| = 1; print "1..4\n"; } +BEGIN { $| = 1; print "1..5\n"; } END {print "not ok 1\n" unless $loaded;} -use String::Similarity; +use String::Unicode::Similarity; $loaded = 1; print "ok 1\n"; @@ -8,4 +8,5 @@ my $s = similarity("this should be same the", "this should be the same"); print $s > 0.825 && $s < 0.827 ? "" : "not ", "ok 3\n"; print similarity("A", "B") == 0 ? "" : "not ", "ok 4\n"; +print similarity(chr(0x100), chr(0x101)) == 0 ? "" : "not ", "ok 5\n";