Xref: feenix.metronet.com bit.listserv.endnote:238
Newsgroups: bit.listserv.endnote
Path: feenix.metronet.com!news.utdallas.edu!corpgate!bnrgate!nott!torn!howland.reston.ans.net!newsserver.jvnc.net!yale.edu!news.yale.edu!proton!dcs
From: dcs@proton.chem.yale.edu (Dave Schweisguth)
Subject: Compact Cambridge LSC -> Refer converter
Message-ID: <1993Jun7.204030.20393@news.yale.edu>
Sender: news@news.yale.edu (USENET News System)
Nntp-Posting-Host: proton.chem.yale.edu
Organization: Yale University
X-Newsreader: TIN [version 1.2 PL0]
Date: Mon, 7 Jun 1993 20:40:30 GMT
Lines: 175

Hi all,

I'm enclosing a Perl script which converts Compact Cambridge Life Sciences
references to Refer. It does a slightly better job than the appropriate
option of PreferRefer (as of 1.3.beta4), mainly wrt capitalization. The
Perl is pretty gross (anyone know how to avoid the redo?) but it works.

Avi has told me that the Compact Cambridge format will change RSN; if they
replace the disks up to '92 this will become instantly useless. That would
be fine with me.

Anyone is welcome to distribute or archive this as they please. I'd be
interested in comments, but I'll only definitely see them in personal mail.

#! /bin/sh
# This is a shell archive.  Remove anything before this line, then unpack
# it by saving it into a file and typing "sh file".  To overwrite existing
# files, type "sh file -c".  You can also feed this as standard input via
# unshar, or by typing "sh <file", e.g..  If this archive is complete, you
# will see the following message at the end:
#		"End of shell archive."
# Contents:  cc2refer
# Wrapped by dcs@proton on Mon Jun  7 16:34:26 1993
PATH=/bin:/usr/bin:/usr/ucb ; export PATH
if test -f 'cc2refer' -a "${1}" != "-c" ; then 
  echo shar: Will not clobber existing file \"'cc2refer'\"
else
echo shar: Extracting \"'cc2refer'\" \(3990 characters\)
sed "s/^X//" >'cc2refer' <<'END_OF_FILE'
X#!/usr/local/bin/perl
X# usage: cc2refer foo [bar baz ...] > out
X
X($whatami = $0)	=~ s|.*/||;		# `basename $0`
X
X%tags = (
X
X# CC tag    	Refer tag     	CC meaning		Refer field
X
X  'AB',		'X',		# abstract
X  'ED',		'E',		# editor
X  'IB',		'@',		# ISBN
X  'PB',		'I',		# publisher
X  'TI',		'T',		# title
X  'CL',		'K',		# classification 	keywords
X  'DE',		'K',		# description		keywords
X  'AF',		'O',		# author affiliation	notes
X  'CE',		'O',		# corporate entry	notes
X  'CF',		'O',		# conference		notes
X  'NT',		'O',		# notes			notes
X  'NU',		'O',		# number		notes
X  'LA',		'O',		# language		notes
X  'SL',		'O',		# summary language	notes
X  'OT',		'O',		# original title	notes
X  'PT',		'O',		# publication type	notes
X  'UI',		'O',		# unique identifier	notes
X);
X
X%notes = (
X  'AF', 'Author affiliation',
X  'CE', 'Corporate entry',
X  'LA', 'Language',
X  'SL', 'Summary language',
X  'OT', 'Original title',
X  'PT', 'Compact Cambridge LSC publication type',
X  'UI', 'Compact Cambridge LSC unique identifier',
X);
X
XLINE: while ($line = <>) {
X
X  if ($line =~ /^([A-Z]{2}): /) {
X
X    $tag = $1;
X    $tagline = $line;
X    $field = '';
X    $field .= $line until ($line = <>) =~ /^[A-Z]{2}: |^$|^\s+/;
X
X    if ($tag eq 'AU') {			# Special author handling
X
X      $field =~ s/\n/ /g;
X      $field =~ s/\s*;\s*/;/g;
X      $field =~ s/\s*$//;
X      foreach (split(';', $field)) {
X        if (s/(\s+)([^\s]+)$/, \2/) {
X          print "%A $_\n";
X        } else {
X	  warn "$whatami: Strange author name at input line $.:\n$_\n";
X	  print "%A: $_\n";
X        }
X      }
X
X    } elsif ($tag eq 'SO') {			# Special source handling
X
X      $field =~ s/\n/ /g;
X      $field =~ s/\s*$//;
X
X      # journal: name, vol, pp, yr
X
X      if ($field =~ /^([^;]+); vol\. (\w+), pp?\. (\w+[-&]?\w*); (\d+)$/) {
X        print "%J $1\n", "%V $2\n", "%P $3\n", "%D $4\n", "%0 Journal Article\n";
X
X      # journal: name, vol, no, pp, yr
X
X      } elsif ($field =~ /^([^;]+); vol\. (\w+), no\. (\w+[-&]?\w*), pp?\. (\w+-?\w*); (\d+)$/) {
X        print "%J $1\n", "%V $2\n", "%N $3\n", "%P $4\n", "%D $5\n", "%0 Journal Article\n";
X
X      # journal: name, vol, no, pt, pp, yr
X
X      } elsif ($field =~ /^([^;]+); vol\. (\w+), no\. (\w+[-&]?\w*), pt\. (\w+), pp?\. (\w+-?\w*); (\d+)$/) {
X        print "%J $1\n", "%V $2\n", "%N $3$4\n", "%P $5\n", "%D $6\n", "%0 Journal Article\n";
X
X      # book section: name, yr, pp
X
X      } elsif ($field =~ /^([^;]+); (\d+); pp?\. (\w+-?\w*)$/) {
X        print "%B $1\n", "%D $2\n", "%P $3\n", "%0 Book Section\n";
X
X      # book section: name, yr, pp, series, vol
X
X      } elsif ($field =~ /^([^;]+); (\d+); pp?\. (\w+-?\w*); ([^;]+); vol\. (\w+)$/) {
X        print "%B $1\n", "%D $2\n", "%P $3\n", "%S $4\n", "%V $5\n", "%0 Book Section\n";
X
X      # book section: name, yr, pp, series, vol, no
X
X      } elsif ($field =~ /^([^;]+); (\d+); pp?\. (\w+-?\w*); ([^;]+); vol\. (\w+), no\. (\w+[-&]?\w*)$/) {
X        print "%B $1\n", "%D $2\n", "%P $3\n", "%S $4\n", "%V $5\n", "%N $6\n", "%0 Book Section\n";
X
X      # mystery reference: yr
X
X      } elsif ($field =~ /^(\d+)$/) {
X        print "%D $1\n";
X      } else {
X        warn "$whatami: Stuffing unparseable source at input line $. into notes:\n$field\n";
X        print "%O Unparseable source: $field\n";
X      }
X
X    } elsif ($tags{$tag} ne '') {		# Default field handling
X
X      unless ($tag =~ /LA|SL/	&& $field eq "English\n" ||
X	      $tag eq 'PT'	&& $field eq "Journal Article\n") {
X        $field =~ s/\s*.\s*$/\n/ if $tag eq 'TI';	# Strip period from title
X        print "%$tags{$tag} ";
X        print "$notes{$tag}: " if $notes{$tag};	# Some fields need annotation
X        print $field;
X      }
X
X    } elsif ($tag !~ /SF|NM|PY/) {		# Skip or punt
X
X      warn "$whatami: Stuffing unknown field at input line $. into notes:\n$tagline";
X      print "%O Unknown field: $tagline";
X      print $field;
X
X    }
X
X    redo LINE;
X  } elsif ($line =~ /^$|^\s+/) {
X    print $line;
X  } else {
X    warn "$whatami: Stray data at input line $.:\n$line\n";
X  }
X}
END_OF_FILE
if test 3990 -ne `wc -c <'cc2refer'`; then
    echo shar: \"'cc2refer'\" unpacked with wrong size!
fi
chmod +x 'cc2refer'
# end of 'cc2refer'
fi
echo shar: End of shell archive.
exit 0

Cheers,

--
| Dave Schweisguth   Yale MB&B & Chemistry   Net: dcs@neutron.chem.yale.edu |
| Lab phone: 203-432-5208     Fax: 203-432-6144    Home phone: 203-624-3866 |
| For complying with the NJ Right To Know Act:  Contents partially unknown. |