#!/usr/bin/perl -w use strict; use FindBin qw($Bin); use lib $Bin; use wtr2_base; my $DEBUG=0; init_db(); my $CAN_OUTPUT= 0; my @files= @ARGV || (<$dir{invoices}/*.xml>); foreach my $file (@files) { # XMLin just re-implements a simplified version of XML::Simple's XMLin # that will work for this specific class of document my $xml= XMLin( $file, forcearray => [ qw(InvoiceRow)]); my $errors= check_invoice( $xml); if( !@$errors) { store_invoice( $xml); } else { print "ERROR in $file\n ", join( "\n ", @$errors), "\n"; if( $CAN_OUTPUT) { my $rejected_file= rejected( $file); print "adding errors in $rejected_file\n" if( $DEBUG); add_errors( $xml, $errors); output_doc_to_check( $rejected_file, $xml); } } } exit; sub check_invoice { my( $xml)= @_; my $errors=[]; # array ref, holds the error messages check_buyer( $xml->{BuyerPartyDetails}->{BuyerPartyIdentifier}->{content}, $xml->{BuyerPartyDetails}->{BuyerOrganisationName}->{content}, $errors ); check_po( $xml->{InvoiceDetails}->{OrderIdentifier}->{content}, $errors); my @rows= @{$xml->{InvoiceRow}}; reset_default_row_id(); foreach my $row( @rows) { # this does not cope well with broken row numbers my $row_id= $row->{RowIdentifier}->{content} || default_row_id(); print "checking row $row_id\n" if $DEBUG; check_qtty( $row_id, $row->{DeliveredQuantity}->{content}, $row->{DeliveredQuantity}->{QuantityUnitCode}, $row->{OrderedQuantity}->{content}, $row->{OrderedQuantity}->{QuantityUnitCode}, $errors ); } return $errors; } sub store_invoice { my( $xml)= @_; print "storing invoice $xml->{InvoiceDetails}->{InvoiceNumber}->{content}\n"; # build the various data structures my $data; my $invoice = $xml->{InvoiceDetails}; $data->{invoice} = { number => $invoice->{InvoiceNumber}->{content}, date => $invoice->{InvoiceDate}->{content}, po => $invoice->{OrderIdentifier}->{content}, amount_no_tax => $invoice->{InvoiceTotalVatExcludedAmount}->{content}, tax => $invoice->{InvoiceTotalVatAmount}->{content}, amount => $invoice->{InvoiceTotalVatIncludedAmount}->{content}, payment_status => $xml->{PaymentStatusDetails}->{PaymentStatusCode}->{content}, }; my $seller = $xml->{SellerPartyDetails}; $data->{seller} = { identifier => $seller->{SellerPartyIdentifier}->{content}, name => $seller->{SellerOrganisationName}->{content}, tax_code => $seller->{SellerOrganisationTaxCode}->{content}, }; my $address = $xml->{SellerPartyDetails}->{SellerPostalAddressDetails}; $data->{address} = { street => $address->{SellerStreetName}->{content}, town => $address->{SellerTownName}->{content}, zip => $address->{SellerPostCodeIdentifier}->{content}, country_code => $address->{CountryCode}->{content}, po_box => $address->{SellerPostOfficeBoxIdentifier}->{content}, }; $data->{contact} = { name => $xml->{SellerContactPersonName}->{content}, phone => $xml->{SellerCommunicationDetails}->{SellerPhoneNumberIdentifier}->{content}, email => $xml->{SellerCommunicationDetails}->{SellerEmailaddressIdentifier}->{content}, }; $data->{invoicerow} ||= []; reset_default_row_id(); foreach my $invoicerow (@{$xml->{InvoiceRow}}) { push @{$data->{invoicerow}}, { row_id => $invoicerow->{RowIdentifier}->{content} || default_row_id(), sku => $invoicerow->{ArticleIdentifier}->{content}, name => $invoicerow->{ArticleName}->{content}, qty => $invoicerow->{DeliveredQuantity}->{content}, qty_unit => $invoicerow->{DeliveredQuantity}->{QuantityUnitCode}, unit_price => $invoicerow->{UnitPriceAmount}->{content}, amount_no_tax => $invoicerow->{RowVatExcludedAmount}->{content}, tax => $invoicerow->{RowVatAmount}->{content}, amount => $invoicerow->{RowAmount}->{content}, } } store_all( $data); } sub XMLin { my( $file, %options)=@_; # get the 'forcearray' elements in a convenient hash my %forcearray= map { $_ => 1} @{$options{forcearray}}; my @current; # the stack of hashes or array in the data structure # $current[0] is the overall structure (the root of the document), # $current[1] is the first_level child currently open # ... # $current[-1] is the current element open( IN, "pyx $file | ") or die "cannot open pyx $file: $!"; while( ) { if( m{^\((.*)$}) # open element { my $tag= $1; my $current={}; # will be filled later; if( @current) { # normal (non-root) element if( $forcearray{$1}) { $current[-1]->{$1} ||= []; push @{$current[-1]->{$1}}, $current; } else { $current[-1]->{$1} = $current; } } push @current, $current; } elsif( m{^\)(.*)$}) # close element (except when only the root is left) { pop @current unless( @current == 1); } elsif( m{^-\\n$}) # empty line, skip { } elsif( m{^-(.*)$}) # content, assign to the current element content { $current[-1]->{content}= $1; } elsif( m{^A(\w*) (.*)$}) # attribute, assign to a field in the current element { $current[-1]->{$1}= $2; } } # note that in case there is an error during the parsing it will show when # closing the file, so you need to check the result of close close IN or die "error processing pyx $file: $!"; return $current[0]; } __END__ =head1 NAME wtr2_pyx_simple =head1 SYNOPSYS perl wtr2_pyx_simple =head1 DESCRIPTION This example uses L in a kinda devious way: it reimplements a simplified version of L's C that will work for the class of documents we are processing. The code is then exactly the same as the one for C. Note that this version of L does not offer any option besides a limited version of C. It will not for exemple use C. I guess this was pretty easy to write as it worked the first time I run it, much to my surprise I should say ;--) Generally It illustrates the fact that it often pays to write a layer on top of generic modules to adapt them to your specific needs (please, just don't release it on CPAN! Chances are that it is really specific to your problem or to the way you like to code, and it will only add to the already considerable confusion in the XML namespace). =head1 AUTHOR Michel Rodriguez =head1 LICENSE This code is Copyright (c) 2003 Michel Rodriguez. All rights reserved. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Comments can be sent to mirod@xmltwig.com =head1 SEE ALSO L, L, wtr2_simple Ways to Rome 2 - Kourallinen Dollareita: http://www.xmltwig.com/article/ways_to_rome_2/