#!/usr/bin/perl -w use strict; use XML::LibXML; use FindBin qw($Bin); use lib $Bin; use wtr2_base; init_db(); my $DEBUG=0; my $CAN_OUTPUT= 1; my @files= @ARGV || (<$dir{invoices}/*.xml>); foreach my $file (@files) { my $parser = new XML::LibXML; my $doc = $parser->parse_file ( $file); my $errors= check_invoice( $doc); if( !@$errors) { store_invoice( $doc); } else { print "ERROR in $file\n ", join( "\n ", @$errors), "\n"; if( $CAN_OUTPUT) { my $rejected_file= rejected( $file); print "adding errors in $rejected_file\n" if( $DEBUG); add_errors( $parser, $doc, $errors); # the parser is passed so it can be re-used output_doc_to_check( $rejected_file, $doc); } }; } exit; sub check_invoice { my( $doc)= @_; my $errors=[]; # array ref, holds the error messages check_buyer( $doc->findvalue( '/Finvoice/BuyerPartyDetails/BuyerPartyIdentifier'), $doc->findvalue( '/Finvoice/BuyerPartyDetails/BuyerOrganisationName'), $errors ); check_po( $doc->findvalue( '/Finvoice/InvoiceDetails/OrderIdentifier'), $errors); my @rows= $doc->findnodes( '/Finvoice/InvoiceRow'); reset_default_row_id(); foreach my $row ( @rows) { # this does not cope well with broken row numbers my $row_id= $row->findvalue( 'RowIdentifier') || default_row_id(); print "checking row $row_id\n" if $DEBUG; my( $delivered_qty, $delivered_unit, $ordered_qty, $ordered_unit)= ( '','','',''); if( my $DeliveredQuantity= $row->findnodes( 'DeliveredQuantity')->[0]) { $delivered_qty = $DeliveredQuantity->textContent; $delivered_unit = $DeliveredQuantity->getAttribute( 'QuantityUnitCode'); } if( my $OrderedQuantity= $row->findnodes( 'OrderedQuantity')->[0]) { $ordered_qty = $OrderedQuantity->textContent; $ordered_unit = $OrderedQuantity->getAttribute( 'QuantityUnitCode'); } check_qtty( $row_id, $delivered_qty, $delivered_unit, $ordered_qty, $ordered_unit, $errors); } return $errors; } sub store_invoice { my( $doc)= @_; my $invoice_number= $doc->findvalue( '/Finvoice/InvoiceDetails/InvoiceNumber'); print "storing invoice $invoice_number\n"; # build the various data structures my $data; my $invoice = $doc->findnodes( '/Finvoice/InvoiceDetails')->[0]; $data->{invoice} = { number => $invoice->findvalue( 'InvoiceNumber'), date => $invoice->findvalue( 'InvoiceDate'), po => $invoice->findvalue( 'OrderIdentifier'), amount_no_tax => $invoice->findvalue( 'InvoiceTotalVatExcludedAmount'), tax => $invoice->findvalue( 'InvoiceTotalVatAmount'), amount => $invoice->findvalue( 'InvoiceTotalVatIncludedAmount'), payment_status => $doc->findvalue( '/Finvoice/PaymentStatusDetails/PaymentStatusCode'), }; my $seller = $doc->findnodes( '/Finvoice/SellerPartyDetails')->[0]; $data->{seller} = { identifier => $seller->findvalue( 'SellerPartyIdentifier'), name => $seller->findvalue( 'SellerOrganisationName'), tax_code => $seller->findvalue( 'SellerOrganisationTaxCode'), }; my $address = $doc->findnodes( '/Finvoice/SellerPartyDetails/SellerPostalAddressDetails')->[0]; $data->{address} = { street => $address->findvalue( 'SellerStreetName'), town => $address->findvalue( 'SellerTownName'), zip => $address->findvalue( 'SellerPostCodeIdentifier'), country_code => $address->findvalue( 'CountryCode'), po_box => $address->findvalue( 'SellerPostOfficeBoxIdentifier'), }; my $contact = $doc->findnodes( '/Finvoice/SellerCommunicationDetails')->[0]; $data->{contact} = { name => $doc->findvalue( '/Finvoice/SellerContactPersonName'), phone => $contact->findvalue( 'SellerPhoneNumberIdentifier'), email => $contact->findvalue( 'SellerEmailaddressIdentifier'), }; $data->{invoicerow} ||= []; reset_default_row_id(); foreach my $invoicerow ( $doc->findnodes( '/Finvoice/InvoiceRow')) { # need to check that the DeliveredQuantity element is present before getting its attribute my $DeliveredQuantity= $invoicerow->getChildrenByTagName( 'DeliveredQuantity')->[0]; my $qty = $DeliveredQuantity ? $DeliveredQuantity->textContent : ''; my $qty_unit= $DeliveredQuantity ? $DeliveredQuantity->getAttribute( 'QuantityUnitCode') : ''; push @{$data->{invoicerow}}, { row_id => $invoicerow->findvalue( 'RowIdentifier') || default_row_id(), sku => $invoicerow->findvalue( 'ArticleIdentifier'), name => $invoicerow->findvalue( 'ArticleName'), qty => $qty, qty_unit => $qty_unit, unit_price => $invoicerow->findvalue( 'UnitPriceAmount'), amount_no_tax => $invoicerow->findvalue( 'RowVatExcludedAmount'), tax => $invoicerow->findvalue( 'RowVatAmount'), amount => $invoicerow->findvalue( 'RowAmount'), } } store_all( $data); } sub add_errors { my( $parser, $doc, $error_messages)= @_; my $root= $doc->documentElement(); # here I chose to build the error messages as text and then parse them my $chunk= "\n \n " . join( "\n ", map { "$_" } @$error_messages) . "\n "; my $errors= $parser->parse_xml_chunk( $chunk ); $root->insertBefore ( $errors, $root->getFirstChild); return $doc; } sub output_doc_to_check { my( $file, $doc)= @_; open( FILE, ">$file") or die "cannot create file to check $file: $!"; print FILE $doc->toString; close FILE; } __END__ =head1 NAME wtr2_libxml =head1 SYNOPSYS perl wtr2_libxml =head1 DESCRIPTION This code uses L to process the invoices. It uses a lot of XML::LibXML specific methods instead of using the DOM. This makes for nicer and safer code. XML::LibXML is a Perl wrapper on top of the libxml2 library, an XML/XPath/DOM/RelaxNG/... library written by Daniel Veillard for the Gnome project. Instead of using navigation methods (C) it relies mostly on C and C, which use XPath to select nodes to access. The error message is built as text and then parsed using C. While this might not be the best method I found it really nice to use. Overall the code was quite easy to write, all the extra goodies provided by XML::LibXML compared to the DOM are really useful and make it much easier to write compact and safe code. My main gripe with XML::LibXML is that it is often unstable, as the Perl module tries to keep up with the development of the librarie. The problem is that libxml2 is so widely used that I find that it is often upgraded by unrelated software, which can then cause trouble to code that uses XML::LibXML. =head1 AUTHOR Michel Rodriguez =head1 LICENSE This code is Copyright (c) 2003 Michel Rodriguez. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Comments can be sent to mirod@xmltwig.com =head1 SEE ALSO L Ways to Rome 2 - Kourallinen Dollareita: http://www.xmltwig.com/article/ways_to_rome_2/