Article 11097 of comp.lang.perl: Path: feenix.metronet.com!news.ecn.bgu.edu!usenet.ins.cwru.edu!howland.reston.ans.net!europa.eng.gtefsd.com!MathWorks.Com!noc.near.net!newshost.meiko.com!not-for-mail From: mike@meiko.com (Mike Stok) Newsgroups: comp.lang.perl Subject: soundex.pl Date: 2 Mar 1994 09:47:16 -0500 Organization: Meiko Scientific, Inc., MA Lines: 255 Message-ID: <2l28tk$6vc@hibbert.meiko.com> [ Archivists note: this item has been replaced with the latest version, at the authors request. ] #!/bin/sh # This is a shell archive (produced by shar 3.49) # To extract the files from this archive, save it to a file, remove # everything above the "!/bin/sh" line above, and type "sh file_name". # # made 03/24/1994 00:35 UTC by Mike.Stok@meiko.concord.ma.us # Source directory /tmp_mnt/develop/sw/misc/mike/soundex # # existing files will NOT be overwritten unless -c is specified # # This shar contains: # length mode name # ------ ---------- ------------------------------------------ # 1677 -r--r--r-- soundex.pl # 2408 -r-xr-xr-x soundex.t # # ============= soundex.pl ============== if test -f 'soundex.pl' -a X"$1" != X"-c"; then echo 'x - skipping soundex.pl (File already exists)' else echo 'x - extracting soundex.pl (Text)' sed 's/^X//' << 'SHAR_EOF' > 'soundex.pl' && package soundex; X ;# $Id: soundex.pl,v 1.2 1994/03/24 00:30:27 mike Exp $ ;# ;# Implementation of soundex algorithm as described by Knuth in volume ;# 3 of The Art of Computer Programming, with ideas stolen from Ian ;# Phillips . ;# ;# Mike Stok , 2 March 1994. ;# ;# Knuth's test cases are: ;# ;# Euler, Ellery -> E460 ;# Gauss, Ghosh -> G200 ;# Hilbert, Heilbronn -> H416 ;# Knuth, Kant -> K530 ;# Lloyd, Ladd -> L300 ;# Lukasiewicz, Lissajous -> L222 ;# ;# $Log: soundex.pl,v $ ;# Revision 1.2 1994/03/24 00:30:27 mike ;# Subtle bug (any excuse :-) spotted by Rich Pinder ;# in the way I handles leasing characters which were different but had ;# the same soundex code. This showed up comparing it with Oracle's ;# soundex output. ;# ;# Revision 1.1 1994/03/02 13:01:30 mike ;# Initial revision ;# ;# ;############################################################################## X ;# $soundex'noCode is used to indicate a string doesn't have a soundex ;# code, I like undef other people may want to set it to 'Z000'. X $noCode = undef; X ;# main'soundex ;# ;# usage: ;# ;# @codes = &main'soundex (@wordList); ;# $code = &main'soundex ($word); ;# ;# This strenuously avoids $[ X sub main'soundex { X local (@s, $f, $fc, $_) = @_; X X foreach (@s) X { X tr/a-z/A-Z/; X tr/A-Z//cd; X X if ($_ eq '') X { X $_ = $noCode; X } X else X { X ($f) = /^(.)/; X tr/AEHIOUWYBFPVCGJKQSXZDTLMNR/00000000111122222222334556/; X ($fc) = /^(.)/; X s/^$fc+//; X tr///cs; X tr/0//d; X $_ = $f . $_ . '000'; X s/^(.{4}).*/$1/; X } X } X X wantarray ? @s : shift @s; } X 1; SHAR_EOF chmod 0444 soundex.pl || echo 'restore of soundex.pl failed' Wc_c="`wc -c < 'soundex.pl'`" test 1677 -eq "$Wc_c" || echo 'soundex.pl: original size 1677, current size' "$Wc_c" fi # ============= soundex.t ============== if test -f 'soundex.t' -a X"$1" != X"-c"; then echo 'x - skipping soundex.t (File already exists)' else echo 'x - extracting soundex.t (Text)' sed 's/^X//' << 'SHAR_EOF' > 'soundex.t' && #!./perl ;# ;# $Id: soundex.t,v 1.2 1994/03/24 00:30:27 mike Exp $ ;# ;# test module for soundex.pl ;# ;# $Log: soundex.t,v $ ;# Revision 1.2 1994/03/24 00:30:27 mike ;# Subtle bug (any excuse :-) spotted by Rich Pinder ;# in the way I handles leasing characters which were different but had ;# the same soundex code. This showed up comparing it with Oracle's ;# soundex output. ;# ;# Revision 1.1 1994/03/02 13:03:02 mike ;# Initial revision ;# ;# X require '../lib/soundex.pl'; X $test = 0; print "1..13\n"; X while () { X chop; X next if /^\s*;?#/; X next if /^\s*$/; X X ++$test; X $bad = 0; X X if (/^eval\s+/) X { X ($try = $_) =~ s/^eval\s+//; X X eval ($try); X if ($@) X { X $bad++; X print "not ok $test\n"; X print "# eval '$try' returned $@"; X } X } X elsif (/^\(/) X { X ($in, $out) = split (':'); X X $try = "\@expect = $out; \@got = &soundex $in;"; X eval ($try); X X if (@expect != @got) X { X $bad++; X print "not ok $test\n"; X print "# expected ", scalar @expect, " results, got ", scalar @got, "\n"; X print "# expected (", join (', ', @expect), X ") got (", join (', ', @got), ")\n"; X } X else X { X while (@got) X { X $expect = shift @expect; X $got = shift @got; X X if ($expect ne $got) X { X $bad++; X print "not ok $test\n"; X print "# expected $expect, got $got\n"; X } X } X } X } X else X { X ($in, $out) = split (':'); X X $try = "\$expect = $out; \$got = &soundex ($in);"; X eval ($try); X X if ($expect ne $got) X { X $bad++; X print "not ok $test\n"; X print "# expected $expect, got $got\n"; X } X } X X print "ok $test\n" unless $bad; } X __END__ # # 1..6 # # Knuth's test cases, scalar in, scalar out # 'Euler':'E460' 'Gauss':'G200' 'Hilbert':'H416' 'Knuth':'K530' 'Lloyd':'L300' 'Lukasiewicz':'L222' # # 7..8 # # check default bad code # '2 + 2 = 4':undef undef:undef # # 9 # # check array in, array out # ('Ellery', 'Ghosh', 'Heilbronn', 'Kant', 'Ladd', 'Lissajous'):('E460', 'G200', 'H416', 'K530', 'L300', 'L222') # # 10 # # check array with explicit undef # ('Mike', undef, 'Stok'):('M200', undef, 'S320') # # 11..12 # # check setting $soundex'noCode # eval $soundex'noCode = 'Z000'; ('Mike', undef, 'Stok'):('M200', 'Z000', 'S320') # # 13 # # a subtle difference between me & oracle, spotted by Rich Pinder # # CZARKOWSKA:C622 SHAR_EOF chmod 0555 soundex.t || echo 'restore of soundex.t failed' Wc_c="`wc -c < 'soundex.t'`" test 2408 -eq "$Wc_c" || echo 'soundex.t: original size 2408, current size' "$Wc_c" fi exit 0 -- The "usual disclaimers" apply. | Meiko Mike Stok | 130C Baker Ave. Ext Mike.Stok@meiko.concord.ma.us | Concord, MA 01742 Meiko tel: (508) 371 0088 | Article 11044 of comp.lang.perl: Path: feenix.metronet.com!news.utdallas.edu!convex!cs.utexas.edu!howland.reston.ans.net!pipex!pipex!not-for-mail From: ian@pipex.net (Ian Phillipps) Newsgroups: comp.lang.perl Subject: Soundex Function for Perl ?? Supersedes: <2kjgpj$di5@tank.pipex.net> Date: 27 Feb 1994 21:24:14 -0000 Organization: PIPEX Ltd, Cambridge, UK. Lines: 38 Message-ID: <2kr31u$7e4@tank.pipex.net> References: <2kg9r1$j5d@hsc.usc.edu> NNTP-Posting-Host: tank.pipex.net In article <2kg9r1$j5d@hsc.usc.edu>, Rich Pinder wrote: >There is a 'standard' function to assist in name matching algorighims >called 'Soundex'. > >I believe its in the public domain, and I also am sure the specs of the >function are available.....but..... Since it's post-your-soundex time, here's mine. No disclaimers. If it doesn't work at your site, I'll come out free of charge and fix it. Just send me the air tickets :-) Ian ---- [This article supersedes an earlier posting of mine - which failed on 'Lloyd'. Here's a replacement. Thanks to Mike Stok...] # This implements the "Soundex" algorithm. # Each argument is treated as a single word, and is reduced to its # Soundex four character (Z999) code. The resulting code or array of # codes is passed back as a the result. # ian@unipalm.co.uk 11 Sep 92 sub soundex { local( @res ) = @_; local($i, $t, $_); for ( @res ) { tr/a-zA-Z//cd; tr/a-zA-Z/A-ZA-Z/s; ($i,$t) = /(.)(.*)/; $t =~ tr/BFPVCGJKQSXZDTLMNRAEHIOUWY/111122222222334556/sd; $_ = substr(($i||'Z').$t.'000', 0, 4 ); } wantarray ? @res : $res[0]; } 1; Article 10948 of comp.lang.perl: Path: feenix.metronet.com!news.utdallas.edu!convex!cs.utexas.edu!howland.reston.ans.net!europa.eng.gtefsd.com!news.uoregon.edu!gaia.ucs.orst.edu!ruby.oce.orst.edu!tardis.co.uk!bill From: bill@tardis.co.uk (William Hails) Newsgroups: comp.lang.perl Subject: Re: Soundex Function for Perl ?? Date: Thu, 24 Feb 94 11:24:04 GMT Organization: University Computing Services - Oregon State University Lines: 57 Message-ID: <18531.9402241124@devone.tardis.co.uk> NNTP-Posting-Host: ruby.oce.orst.edu Originator: root@ruby.oce.orst.edu on 23 Feb 1994 11:12:33 -0800 rpinder@hsc.usc.edu (Rich Pinder) wrote: rpinder> There is a 'standard' function to assist in name matching algorighims rpinder> called 'Soundex'. rpinder> I believe its in the public domain, and I also am sure the specs of the rpinder> function are available.....but..... rpinder> I'd be interested in knowing if there is any such function already \ compiled rpinder> that would be accessible from Perl. Heres one I knocked up a while back, usual disclaimers etc. 8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<--- # soundex subroutine sub soundex { package soundex; local ($name) = @_; local($buf, $c, $pc, $idx) = ("Z000"); $name =~ tr/a-z/A-Z/; for ($pc = '0'; $name && $idx < 4; substr($name, 0, 1) = '') { if (($c) = ($name =~ /^([A-Z])/)) { if ($idx == 0 || ($map{$c} ne '0' && $map{$c} ne $pc)) { substr($buf, $idx, 1) = $idx ? $map{$c} : $c; ++$idx; } $pc = $map{$c}; } } $buf; } package soundex; %map = ( A, 0, B, 1, C, 2, D, 3, E, 0, F, 1, G, 2, H, 0, I, 0, J, 2, K, 2, L, 4, M, 5, N, 5, O, 1, P, 0, Q, 2, R, 6, S, 2, T, 3, U, 0, V, 1, W, 0, X, 2, Y, 0, Z, 2 ); 1; 8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<---8<--- -- Cheers Bill Bill Hails Tel (UK) 0483 300 200