#!/usr/bin/perl -w use strict; use XML::DOM; use FindBin qw($Bin); use lib $Bin; use wtr2_base; init_db(); my $DEBUG=0; my $CAN_OUTPUT= 1; my @files= @ARGV || (<$dir{invoices}/*.xml>); foreach my $file (@files) { my $parser = new XML::DOM::Parser; my $doc = $parser->parsefile ( $file); my $errors= check_invoice( $doc); if( !@$errors) { store_invoice( $doc); } else { print "ERROR in $file\n ", join( "\n ", @$errors), "\n"; if( $CAN_OUTPUT) { my $rejected_file= rejected( $file); print "adding errors in $rejected_file\n" if( $DEBUG); add_errors( $doc, $errors); output_doc_to_check( $rejected_file, $doc); } }; } exit; sub check_invoice { my( $doc)= @_; my $root= $doc->getDocumentElement; # so we really work with the root element my $errors=[]; # array ref, holds the error messages my $BuyerPartyDetails = first_child( $root, 'BuyerPartyDetails'); my $BuyerPartyIdentifier = first_child( $BuyerPartyDetails, 'BuyerPartyIdentifier'); my $BuyerOrganisationName = first_child( $BuyerPartyDetails, 'BuyerOrganisationName'); check_buyer( text( $BuyerPartyIdentifier), text( $BuyerOrganisationName), $errors ); my $InvoiceDetails = first_child( $root, 'InvoiceDetails'); my $OrderIdentifier = first_child( $InvoiceDetails, 'OrderIdentifier'); check_po( text( $OrderIdentifier), $errors); my @rows= children( $root, 'InvoiceRow'); reset_default_row_id(); foreach my $row ( @rows) { # this does not cope well with broken row numbers my $row_id= text( first_child( $row, 'RowIdentifier')) || default_row_id(); print "checking row $row_id\n" if $DEBUG; my $DeliveredQuantity= first_child( $row, 'DeliveredQuantity'); my $OrderedQuantity = first_child( $row, 'OrderedQuantity'); my $delivered_qty = text( $DeliveredQuantity); my $delivered_unit = $DeliveredQuantity ? $DeliveredQuantity->getAttribute( 'QuantityUnitCode') : ''; my $ordered_qty = text( $OrderedQuantity); my $ordered_unit = $OrderedQuantity ? $OrderedQuantity->getAttribute( 'QuantityUnitCode') : ''; check_qtty( $row_id, $delivered_qty, $delivered_unit, $ordered_qty, $ordered_unit, $errors); } return $errors; } sub store_invoice { my( $doc)= @_; my $root= $doc->getDocumentElement; # so we really work with the root element print "storing invoice ", text( first_child( first_child( $root, 'InvoiceDetails'), 'InvoiceNumber')), "\n"; # build the various data structures my $data; my $invoice = first_child( $root, 'InvoiceDetails'); $data->{invoice} = { number => text( first_child( $invoice, 'InvoiceNumber')), date => text( first_child( $invoice, 'InvoiceDate')), po => text( first_child( $invoice, 'OrderIdentifier')), amount_no_tax => text( first_child( $invoice, 'InvoiceTotalVatExcludedAmount')), tax => text( first_child( $invoice, 'InvoiceTotalVatAmount')), amount => text( first_child( $invoice, 'InvoiceTotalVatIncludedAmount')), payment_status => text( first_child( first_child( $root, 'PaymentStatusDetails'), 'PaymentStatusCode')), }; my $seller = first_child( $root, 'SellerPartyDetails'); $data->{seller} = { identifier => text( first_child( $seller, 'SellerPartyIdentifier')), name => text( first_child( $seller, 'SellerOrganisationName')), tax_code => text( first_child( $seller, 'SellerOrganisationTaxCode')), }; my $SellerPartyDetails = first_child( $root, 'SellerPartyDetails'); my $address = first_child( $SellerPartyDetails, 'SellerPostalAddressDetails'); $data->{address} = { street => text( first_child( $address, 'SellerStreetName')), town => text( first_child( $address, 'SellerTownName')), zip => text( first_child( $address, 'SellerPostCodeIdentifier')), country_code => text( first_child( $address, 'CountryCode')), po_box => text( first_child( $address, 'SellerPostOfficeBoxIdentifier')), }; my $contact = first_child( $root, 'SellerCommunicationDetails'); $data->{contact} = { name => text( first_child( $root, 'SellerContactPersonName')), phone => text( first_child( $contact, 'SellerPhoneNumberIdentifier')), email => text( first_child( $contact, 'SellerEmailaddressIdentifier')), }; $data->{invoicerow} ||= []; reset_default_row_id(); foreach my $invoicerow ( children($root, 'InvoiceRow')) { # need to check that the DeliveredQuantity element is present before getting its attribute my $DeliveredQuantity= first_child( $invoicerow, 'DeliveredQuantity'); my $qty_unit= $DeliveredQuantity ? $DeliveredQuantity->getAttribute( 'QuantityUnitCode') : ''; push @{$data->{invoicerow}}, { row_id => text( first_child( $invoicerow, 'RowIdentifier')) || default_row_id(), sku => text( first_child( $invoicerow, 'ArticleIdentifier')), name => text( first_child( $invoicerow, 'ArticleName')), qty => text( $DeliveredQuantity), qty_unit => $qty_unit, unit_price => text( first_child( $invoicerow, 'UnitPriceAmount')), amount_no_tax => text( first_child( $invoicerow, 'RowVatExcludedAmount')), tax => text( first_child( $invoicerow, 'RowVatAmount')), amount => text( first_child( $invoicerow, 'RowAmount')), } } store_all( $data); } sub add_errors { my( $doc, $error_messages)= @_; my $root= $doc->getDocumentElement; my $errors= $doc->createElement( 'errors'); $root->insertBefore ( $errors, $root->getFirstChild); foreach my $message (@$error_messages) { my $error= $doc->createElement( 'error'); $errors->appendChild( $error); # those 2 lines could be replaced by $error->addText( $message) # which is not in the DOM spec my $text= $doc->createTextNode( $message); $error->appendChild( $text); } return $doc; } sub output_doc_to_check { my( $file, $doc)= @_; open( FILE, ">$file") or die "cannot create file to check $file: $!"; print FILE $doc->toString; close FILE; } # this is very important,this ensures that we get the proper child # never use XML::DOM's getFirstChild method directly! sub first_child { my( $node, $tag)= @_; my $child= $node->getFirstChild or return undef; while( $child && ($child->getNodeName ne $tag) ) { $child= $child->getNextSibling; } return $child; } sub children { my( $node, $tag)= @_; my @children; my $child= first_child( $node, $tag) or return undef; push @children, $child; while( $child= $child->getNextSibling) { push @children, $child if( $child->getNodeName eq $tag); } return @children; } # node must include only text (and comments) sub text { my( $node)= @_; unless( $node) { return undef; } my $text=''; foreach my $child ($node->getChildNodes) { if( $child->getNodeName eq '#text') { $text.= $child->getData; } } return $text; } __END__ =head1 NAME wtr2_dom =head1 SYNOPSYS perl wtr2_dom =head1 DESCRIPTION This code uses L to process the invoices I have never liked the DOM. The Object Model is good, very complete and solid, but the API is very Java oriented (camels are good as Perl mascottes, not as veryLongMethodNames), and at least at level 1 (XML::DOM is a level 1 DOM implementation) quite weak and indeed dangerous. Having gotten this out of my system... Writing the DOM example wasn't particulary hard. It was long and quite painful, but more boring than difficult. A minor annoyance, that could actually be a blessing in a different context: every time I needed to access an attribute for an optional element (DeliveredQuantity for example) I had to check the existence of the element, or calling C on C (the non existent element) would cause the script to C. L and the likes, by contrast, would let me access non existent hash values in the Perl data structure without complaining. Having to check is a pain in a short script like this one, but could be very useful in a bigger project, as it is always better to have the code die with a bang than fail silently (and cause errors later). I wrote a little layer on top of it, with the L, L and L functions, that basically ensure that when I get a child I get the proper one, and not an extra whitespace, comment or other, that would happen to be in the XML. =head1 AUTHOR Michel Rodriguez =head1 LICENSE This code is Copyright (c) 2003 Michel Rodriguez. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Comments can be sent to mirod@xmltwig.com =head1 SEE ALSO XML::DOM Ways to Rome 2 - Kourallinen Dollareita : http://www.xmltwig.com/article/ways_to_rome_2/