#!/usr/bin/perl -w # Using XML::Stream::Parser # author: Ryan Eatmon # modified by mirod use strict; use XML::Stream::Parser; my( $in_lhs, $lhs, $in_rhs, $rhs, $counter); my $parser = new XML::Stream::Parser(handlers => { startElement => \&start_element, characters => \&characters, endElement => \&end_element, } ); # a kludge to filter out UTF-8 related warnings $SIG{__WARN__}= sub { print STDERR @_ unless( $_[0]=~/^(Malformed UTF-8 character|utf8)/); }; $parser->parsefile( 'REC-xml-19980210.xml'); exit; sub start_element { my $parser = shift; my ($tag,%att) = @_; if ($tag eq 'rhs') { $in_rhs = 1; } elsif ($tag eq 'lhs') { $in_lhs = 1; } elsif ($tag eq 'prod') { $rhs = ''; $lhs = ''; } } sub end_element { my $parser = shift; my ($tag) = @_; if ($tag eq 'rhs') { $in_rhs = undef; } elsif ($tag eq 'lhs') { $in_lhs = undef; } elsif ($tag eq 'prod') { $counter++; my $prod = "[" . $counter. "] " . $lhs ." ::= " .$rhs ; $prod = clean( $prod ); print $prod,"\n"; } } sub characters { my $parser = shift; my ($cdata) = @_; if ($in_lhs) { $lhs .= $cdata; } elsif ($in_rhs) { $rhs .= $cdata; } } sub clean { my($s) = @_; $s=~ s/\xc2\xa0/ /sg; $s=~ s{\ }{ }g; $s=~ s{&pic;}{'?>'}g; $s=~ s{&xmlpio;}{'