#!/usr/bin/perl -w # Using XML::Twig # Author: Michel Rodriguez use strict; use XML::Twig; my $i=0; my $twig = XML::Twig->new( twig_roots => # will build the tree only for prod { prod => \&prod }, # elements, and call the prod sub keep_spaces => 1, # spaces will be dealt with by clean() ); $twig->parsefile('REC-xml-19980210.xml'); sub prod { my( $twig, $prod)= @_; my $lhs= $prod->field( 'lhs'); my $rhs= join '', map {$_->text} $prod->children( 'rhs'); $i++; my $prod_text = "[$i] $lhs ::= $rhs"; print clean( $prod_text) . "\n"; } sub clean { my( $string)= @_; $string =~ s/\xc2\xa0/ /sg; $string =~ s/\s+/ /g; $string=~ s{\s$}{}g; return $string; }