Xref: feenix.metronet.com bit.listserv.endnote:238 Newsgroups: bit.listserv.endnote Path: feenix.metronet.com!news.utdallas.edu!corpgate!bnrgate!nott!torn!howland.reston.ans.net!newsserver.jvnc.net!yale.edu!news.yale.edu!proton!dcs From: dcs@proton.chem.yale.edu (Dave Schweisguth) Subject: Compact Cambridge LSC -> Refer converter Message-ID: <1993Jun7.204030.20393@news.yale.edu> Sender: news@news.yale.edu (USENET News System) Nntp-Posting-Host: proton.chem.yale.edu Organization: Yale University X-Newsreader: TIN [version 1.2 PL0] Date: Mon, 7 Jun 1993 20:40:30 GMT Lines: 175 Hi all, I'm enclosing a Perl script which converts Compact Cambridge Life Sciences references to Refer. It does a slightly better job than the appropriate option of PreferRefer (as of 1.3.beta4), mainly wrt capitalization. The Perl is pretty gross (anyone know how to avoid the redo?) but it works. Avi has told me that the Compact Cambridge format will change RSN; if they replace the disks up to '92 this will become instantly useless. That would be fine with me. Anyone is welcome to distribute or archive this as they please. I'd be interested in comments, but I'll only definitely see them in personal mail. #! /bin/sh # This is a shell archive. Remove anything before this line, then unpack # it by saving it into a file and typing "sh file". To overwrite existing # files, type "sh file -c". You can also feed this as standard input via # unshar, or by typing "sh 'cc2refer' <<'END_OF_FILE' X#!/usr/local/bin/perl X# usage: cc2refer foo [bar baz ...] > out X X($whatami = $0) =~ s|.*/||; # `basename $0` X X%tags = ( X X# CC tag Refer tag CC meaning Refer field X X 'AB', 'X', # abstract X 'ED', 'E', # editor X 'IB', '@', # ISBN X 'PB', 'I', # publisher X 'TI', 'T', # title X 'CL', 'K', # classification keywords X 'DE', 'K', # description keywords X 'AF', 'O', # author affiliation notes X 'CE', 'O', # corporate entry notes X 'CF', 'O', # conference notes X 'NT', 'O', # notes notes X 'NU', 'O', # number notes X 'LA', 'O', # language notes X 'SL', 'O', # summary language notes X 'OT', 'O', # original title notes X 'PT', 'O', # publication type notes X 'UI', 'O', # unique identifier notes X); X X%notes = ( X 'AF', 'Author affiliation', X 'CE', 'Corporate entry', X 'LA', 'Language', X 'SL', 'Summary language', X 'OT', 'Original title', X 'PT', 'Compact Cambridge LSC publication type', X 'UI', 'Compact Cambridge LSC unique identifier', X); X XLINE: while ($line = <>) { X X if ($line =~ /^([A-Z]{2}): /) { X X $tag = $1; X $tagline = $line; X $field = ''; X $field .= $line until ($line = <>) =~ /^[A-Z]{2}: |^$|^\s+/; X X if ($tag eq 'AU') { # Special author handling X X $field =~ s/\n/ /g; X $field =~ s/\s*;\s*/;/g; X $field =~ s/\s*$//; X foreach (split(';', $field)) { X if (s/(\s+)([^\s]+)$/, \2/) { X print "%A $_\n"; X } else { X warn "$whatami: Strange author name at input line $.:\n$_\n"; X print "%A: $_\n"; X } X } X X } elsif ($tag eq 'SO') { # Special source handling X X $field =~ s/\n/ /g; X $field =~ s/\s*$//; X X # journal: name, vol, pp, yr X X if ($field =~ /^([^;]+); vol\. (\w+), pp?\. (\w+[-&]?\w*); (\d+)$/) { X print "%J $1\n", "%V $2\n", "%P $3\n", "%D $4\n", "%0 Journal Article\n"; X X # journal: name, vol, no, pp, yr X X } elsif ($field =~ /^([^;]+); vol\. (\w+), no\. (\w+[-&]?\w*), pp?\. (\w+-?\w*); (\d+)$/) { X print "%J $1\n", "%V $2\n", "%N $3\n", "%P $4\n", "%D $5\n", "%0 Journal Article\n"; X X # journal: name, vol, no, pt, pp, yr X X } elsif ($field =~ /^([^;]+); vol\. (\w+), no\. (\w+[-&]?\w*), pt\. (\w+), pp?\. (\w+-?\w*); (\d+)$/) { X print "%J $1\n", "%V $2\n", "%N $3$4\n", "%P $5\n", "%D $6\n", "%0 Journal Article\n"; X X # book section: name, yr, pp X X } elsif ($field =~ /^([^;]+); (\d+); pp?\. (\w+-?\w*)$/) { X print "%B $1\n", "%D $2\n", "%P $3\n", "%0 Book Section\n"; X X # book section: name, yr, pp, series, vol X X } elsif ($field =~ /^([^;]+); (\d+); pp?\. (\w+-?\w*); ([^;]+); vol\. (\w+)$/) { X print "%B $1\n", "%D $2\n", "%P $3\n", "%S $4\n", "%V $5\n", "%0 Book Section\n"; X X # book section: name, yr, pp, series, vol, no X X } elsif ($field =~ /^([^;]+); (\d+); pp?\. (\w+-?\w*); ([^;]+); vol\. (\w+), no\. (\w+[-&]?\w*)$/) { X print "%B $1\n", "%D $2\n", "%P $3\n", "%S $4\n", "%V $5\n", "%N $6\n", "%0 Book Section\n"; X X # mystery reference: yr X X } elsif ($field =~ /^(\d+)$/) { X print "%D $1\n"; X } else { X warn "$whatami: Stuffing unparseable source at input line $. into notes:\n$field\n"; X print "%O Unparseable source: $field\n"; X } X X } elsif ($tags{$tag} ne '') { # Default field handling X X unless ($tag =~ /LA|SL/ && $field eq "English\n" || X $tag eq 'PT' && $field eq "Journal Article\n") { X $field =~ s/\s*.\s*$/\n/ if $tag eq 'TI'; # Strip period from title X print "%$tags{$tag} "; X print "$notes{$tag}: " if $notes{$tag}; # Some fields need annotation X print $field; X } X X } elsif ($tag !~ /SF|NM|PY/) { # Skip or punt X X warn "$whatami: Stuffing unknown field at input line $. into notes:\n$tagline"; X print "%O Unknown field: $tagline"; X print $field; X X } X X redo LINE; X } elsif ($line =~ /^$|^\s+/) { X print $line; X } else { X warn "$whatami: Stray data at input line $.:\n$line\n"; X } X} END_OF_FILE if test 3990 -ne `wc -c <'cc2refer'`; then echo shar: \"'cc2refer'\" unpacked with wrong size! fi chmod +x 'cc2refer' # end of 'cc2refer' fi echo shar: End of shell archive. exit 0 Cheers, -- | Dave Schweisguth Yale MB&B & Chemistry Net: dcs@neutron.chem.yale.edu | | Lab phone: 203-432-5208 Fax: 203-432-6144 Home phone: 203-624-3866 | | For complying with the NJ Right To Know Act: Contents partially unknown. |