Article 11100 of comp.lang.perl: Path: feenix.metronet.com!news.utdallas.edu!wupost!howland.reston.ans.net!gatech!usenet.ufl.edu!travis.csd.harris.com!amber!brad From: brad@amber.ssd.csd.harris.com (Brad Appleton) Newsgroups: comp.lang.perl Subject: texinfo preprocessor - need help optimizing Date: 2 Mar 1994 15:21:24 GMT Organization: Harris CSD, Ft. Lauderdale, FL Lines: 430 Message-ID: <2l2atk$np1@travis.csd.harris.com> NNTP-Posting-Host: amber.ssd.csd.harris.com Summary: I need your help I have a perl script/library function to preprocess a texinfo file. What it does is read the texinfo file, expanding all the @include's and reading all the @iftex/@ifinfo/@ignore commands and spit out the actual commands that would NOT be ignored by makeinfo (or by TeX). The problem is, it seems to frequently dump core when I get to somewhere around 25,000 lines of input and I really dont know why. I have used "perl -d" but it really hasnt given me any reason to suspect that I am doing something wrong (as opposed to "correct but inefficient") in my script. I have also debugged the perl executable (a stripped version of it anyway) and have not found any evidence of infinite recursion. I am using perl4 patchlevel 36 on a Harris NightHawk running CX/UX 7.X (a "real-time" variant of Unix). I be much obliged if some of you would be willing to read the script and point out some inefficiencies (speed-wise or memory-wise), or things that you think could be done "better" (in your possibly subjective opinion). Here is the script - your help is appreciated! ------------------------------------------------------------------------------- # texinfo.pl -- # --------------------------------------------------------------------------- # Preprocess a texinfo file and let the user see only those lines that will # actually appear in in the info file (@include'd files are expanded). # # Usage: # require 'texinfo.pl'; # &texinfo(name); # # where is the name of the users function (which takes an input # line as its parameter) that will process the input lines, one by one. # If no "name" parameter is given than the default action (which is to # print the line) is used. The "&texinfo" function will parse @ARGV # looking for 'makeinfo' options and then will preprocess the input files # specified by the rest of @ARGV. # # Other useful items: # &texinfo'where -- Subroutine (takes no parameters) to return the current # input file name and line number (in a single string). # # $texinfo'DEBUG -- Set this variable to non-zero if you want some simple # debugging output. # --------------------------------------------------------------------------- # I have to do my own option processing cuz getopt.pl, getopts.pl, and # newgetopt.pl wont do what I want. # # Need to accept the following makeinfo options: # -I DIR to add DIR to the directory search list for including # files with the `@include' command. # -D VAR to define a variable, as with `@set'. # -U VAR to undefine a variable, as with `@clear'. # --no-validate to suppress node cross reference validation. # --no-warn to suppress warning messages (errors are still output). # --no-split to suppress the splitting of large files. # --no-headers to suppress the output of Node: Foo headers. # --verbose to print information about what is being done. # --version to print the version number of Makeinfo. # --output FILE or -o FILE # to specify the output file. When you specify the # output file in this way, any `@setfilename' in the # input file is ignored. # --paragraph-indent NUM # to set the paragraph indent to NUM (default 3). # --fill-column NUM to set the filling column to NUM (default 72). # --error-limit NUM to set the error limit to NUM (default 100). # --reference-limit NUM # to set the reference warning limit to NUM (default 1000). # --footnote-style STYLE # to set the footnote style to STYLE. STYLE should # either be `separate' to place footnotes in their own # node, or `end', to place the footnotes at the end of # the node in which they are defined (the default). # # In addition, I will allow the following: # -X FILE dont expand @includes of the given FILE # --debug generate output for debugging # --no-include dont expand '@include'ed files # --tex generate output that TeX would see (instead of makeinfo). # reverses the interpretation of @ifinfo, @iftex, and @tex # directives. # package texinfo; $DEBUG = 0; ## Debugging? $IS_INFO = 1; ## Should @ifinfo be TRUE? $IGNORE_INCLUDES = 0; ## Ignore @include directives? %IGNORE_INCS = (); ## List of files to NEVER @include $CMD='@'; ## Texinfo command prefix. $USERSUB = "texinfo'default_usersub" ; ## default line-processing action @CONDS = (); ## Stack of active texinfo conditions @INCS = (); ## Directory search path for @included files @HANDLES = (); ## Stack of input-file handles %LINES = (); ## Keep track of current line in each active handle $LINES = 0; ## Total # of input lines seen so far # Default line-processing action sub texinfo'default_usersub { # just print to standard output print STDOUT @_ ; } # Get makeinfo-options from main'@ARGV sub texinfo'getopts { local($opt, $_); ## get options from @main'ARGV while ($main'ARGV[0] =~ /^-/) { $_ = shift(@main'ARGV); last if ($_ eq "--"); if (/^-([UDIXo])(.*)$/o) { $opt = $1; $_ = $2; $_ = shift(@main'ARGV) if (! $_); if ($opt eq "D") { if (/^([^=]+)=(.*)$/) { $DEFS{$1} = $2; } else { $DEFS{$_} = 1; } } elsif ($opt eq "U") { undef $DEFS{$_}; } elsif ($opt eq "I") { unshift(@INCS, "$_"); } elsif ($opt eq "X") { ++$IGNORE_INCS{$_}; } elsif ($opt eq "o") { ; } } elsif (/^--o.*$/o) { $_ = shift(@main'ARGV); } elsif (/^--no-inc.*$/o) { $IGNORE_INCLUDES = 1; } elsif (/^--[Dd][Ee][Bb][Uu][Gg].*$/o) { $DEBUG =1; } elsif (/^--[Tt][Ee][Xx].*$/o) { $IS_INFO = 0; } elsif (/^--([^o].*)$/o) { $_ = $1; if (/^[^nv]/) { $_ = shift(@main'ARGV); } } else { print STDERR "$0: unrecognized 'makeinfo' option \"$_\" -- ignored.\n"; } } } # Return the current filename and line-number in the format "file:line" # (good for printing error-messages) # sub texinfo'where { local($fhandle) = $HANDLES[$#HANDLES]; return "$LINES (${fhandle}:$LINES{$fhandle})" if ($DEBUG); return "${fhandle}:$LINES{$fhandle}"; } # ------------------------------------------------------------------ # The array CONDS is a "stack" of conditions!!! Each time we see # a statement that implies conditional inclusion/exclusion of text, # we push the statement type on the stack (along with an indication # of whether the condition was true or false). # ------------------------------------------------------------------ # Push the active condition on the stack, remembering its type, result # and where in the input it occurred. # sub texinfo'push_cond { local($type, $result) = @_; local($where) = &texinfo'where; local($_) = "${type}\t${result}\t${where}"; push(@CONDS, $_); } # Get the current active condition. returns the list (type, result, where). # If there is no active condition, returns the list ('', 1, 0). # sub texinfo'top_cond { return ('', 1, 0) if (0 == @CONDS); split(" ", $CONDS[$#CONDS]); } # Same as top_cond, but also removes the topmost condition from the stack sub texinfo'pop_cond { return ('', 1, 0) if (0 == @CONDS); split(" ", pop(@CONDS)); } # Complain about any conditions that are still active in the given input file sub texinfo'extra_cond { local($fhandle) = @_; local($cond_type, $cond_result, $cond_where); local($fname, $line); ## Check for any un-terminated conditions in this file ## (they are not allowed to span across files) while (1) { ($cond_type, $cond_result, $cond_where) = &texinfo'top_cond; last if (! $cond_type); ($fname, $line) = split(':', $cond_where); last if ($fname ne $fhandle); print STDERR "$0: ($where) expecting \"${CMD}end $cond_type\" for ${CMD}$cond_type at $cond_where.\n"; &texinfo'pop_cond; } } # Evaluate an inclusion/exclusion condition. The type should be one # of 'ifset', 'ifclear', 'iftex', 'ifinfo', 'ignore', or 'tex'. The # expression should only be present if the type is 'ifset' or 'ifclear', # and should be the name of the corresponding value to test. # sub texinfo'eval_cond { local($type, $expr) = @_; local($result) = 0; ## trim whitespace $type =~ s/\s*(\S*)\s*/\1/; $expr =~ s/\s*(\S*)\s*/\1/; ## evaluate the expression if ($type eq "ifset") { $result = 1 if (defined($DEFS{$expr})); } elsif ($type eq "ifclear") { $result = 1 if (! defined($DEFS{$expr})); } elsif ($type eq "iftex") { $result = 1 - $IS_INFO; } elsif ($type eq "ifinfo") { $result = $IS_INFO; } elsif ($type eq "tex") { $result = 1 - $IS_INFO; } elsif ($type eq "ignore") { $result = 0; } else { local($where) = &texinfo'where; print STDERR "$0: ($where) unknown command \"${CMD}$type\"\n."; } print STDERR "> $type $expr = $result\n" if ($DEBUG); $result; } # Start conditional inclusion/exclusion command. # # We are given the type of the current condition, and its result. # The result we "push" onto the condition stack will be TRUE *only* if the # previous condition result and the given condition result are both TRUE. # sub texinfo'start_cond { local($type, $expr) = @_; local($result, $_); local($prev_type, $prev_result, $prev_where) = &texinfo'top_cond; $result = $prev_result && &eval_cond($type, $expr); &texinfo'push_cond($type, $result); } # End conditional inclusion/exclusion command # # We are given the type of condition we expect to be in the middle of. # If we are in the middle of that condition-type, then pop it from the # stack, otherwise complain about an @end for the wrong type. # sub texinfo'end_cond { local($end_type) = @_; local($where) = &texinfo'where; local($cond_type, $cond_result, $cond_where) = &texinfo'top_cond; if ($cond_type eq $end_type) { &texinfo'pop_cond; } elsif ($cond_type) { print STDERR "$0: ($where) expecting \"${CMD}end $cond_type\" for ${CMD}$cond_type at $cond_where but saw \"${CMD}end $end_type\" instead.\n" ; } else { print STDERR "$0: ($where) \"${CMD}end $end_type\" has no corresponding ${CMD}$cond_type.\n"; } } # Handle @set VALUE sub texinfo'def_cmd { local($name, $value) = @_; $value = 1 if (! $value); $DEFS{$name} = $value ; } # Handle @clear VALUE sub texinfo'undef_cmd { local($name) = @_; undef $DEFS{$name} ; } # Find an @included file sub texinfo'find_file { local($fname) = @_; local($found) = 0; local($dir); local($result); ## look for file in current directory if (-f $fname) { ++$found; $result = $fname; } else { ## look for file in directory list foreach $dir (@INCS) { if (-f "$dir/$fname") { ++$found; $result = "$dir/$fname"; last; } } } $result; } # Handle @include's sub texinfo'include_cmd { local($included) = @_; local($result) = 0; local($where) = &texinfo'where; local($fname) = &texinfo'find_file($included); if ($IGNORE_INCLUDES || defined($IGNORE_INCS{$fname}) || defined($IGNORE_INCS{$included})) { print STDERR "> Ignoring @include $included\n" if ($DEBUG); } elsif ($fname) { &texinfo'preprocess_files($fname); $result = 1; } else { print STDERR "$0: ($where) file $included not found.\n"; } $result; } # Process a texinfo input line sub texinfo'preprocess_line { local($_) = @_; local($where) = &texinfo'where; print STDERR "$where\n" if ($DEBUG); if (/^\s*${CMD}(if\w*|tex|ignore)\s*(.*)\s*$/) { print STDERR "> $_\n" if ($DEBUG); &texinfo'start_cond($1, $2); } elsif (/^\s*${CMD}end\s+(if\w*|tex|ignore)\s*/) { print STDERR "> $_\n" if ($DEBUG); &texinfo'end_cond($1); } else { local($type, $result, $where) = &texinfo'top_cond; if ($result) { if (/^\s*${CMD}set\s+(\S*)\s*(\S*)/) { &texinfo'def_cmd($1, $2); } elsif (/^\s*${CMD}clear\s+(\S*)/) { &texinfo'undef_cmd($1); } elsif (/^\s*${CMD}include\s+(\S*)/) { s/^/@c / if (&texinfo'include_cmd($1)); } ## Regular line - pass it to the user $_ .= "\n"; eval "&${USERSUB}(\$_);" ; } } } # Handle a file of texinfo commands sub texinfo'preprocess_files { local(@argv) = @_; local($fname, $fhandle); foreach $fname (@argv) { $fhandle = $fname; if (open($fhandle, $fname)) { &texinfo'preprocess_handle($fhandle); close($fhandle); } else { print STDERR "$0: unable to open $fname for reading: $!\n" ; } } } # Handle an open filehandle of texinfo commands sub texinfo'preprocess_handle { local($_) = @_; local($fhandle) = $_; push(@HANDLES, $fhandle); while (<$fhandle>) { ++$LINES{$fhandle}; ## Update the line-count for this file ++$LINES; chop; &texinfo'preprocess_line("$_"); } &texinfo'extra_cond($fhandle); pop(@HANDLES); } # If a list is given, preprocess the given files, otherwise preprocess STDIN sub texinfo'preprocess { local(@argv) = @_; if (0 < @argv) { &texinfo'preprocess_files(@argv); } elsif (0 == @HANDLES) { &texinfo'preprocess_handle(STDIN); } } # Here is the main routine that parses @ARGV for options and then # processes the remaining filenames in @ARGV ... package main; sub main'texinfo { local($_) = @_; local($old_usersub) = $texinfo'USERSUB; if ($_) { s/^/main\'/ if (! /^\w+\'\w+$/); $texinfo'USERSUB = $_; } &texinfo'getopts(); &texinfo'preprocess(@main'ARGV); $texinfo'USERSUB = $old_usersub; } 1; -- _______________________"And miles to go before I sleep."______________________ Brad Appleton Harris Computer Systems, Fort Lauderdale, FL USA brad@ssd.csd.harris.com Disclaimer: I said it, not my employer!