#!/usr/bin/perl ############################################################# # # Generic CSV to XML Processing Utility # # 24 Feb 2005 Jim Fullton, WIPO # # Takes a simple CSV (Comma Separated Value) file of # priority document numbers and creates a correct EDI # XML request. # # The CSV file is normally generated as the output from Excel # and the comma separated part isn't really needed - we # are just using the ubiquitous Excel as a data entry tool. # This tool can be easily modified by offices to allow # office-specific reference numbers in the spreadsheet. # I commented out a line that would treat the first column # as a reference number and the 2nd as the pdoc number. # # The output XML is written to stdout, with commentary # to stderr. # # NOTE: THIS IS AN EXAMPLE!! It will work "as-is" # but it is not up to standards in terms of error processing, # completeness, etc. It is a template for offices to use # in the development of their own applications. # ############################################################## use Getopt::Long; GetOptions('file=s' => \$CSVF, "doctype=s" => \@doctypes); if(!$CSVF){ usage(); } if(@doctypes==0){ usage(); } open(IN,"<$CSVF") or die "Can't open file $CSVF"; # file name will be identifier-ctry-media.csv # the identifier can be anything # ctry is the 2 character WIPO ST.3 country code # media is ftp for secure ftp download, cddvd for cd's # e.g. 12345-us-cddvd.csv or 12345-us-ftp.csv # p-doc numbers should be in this form: AT03/00100 # e.g. ST.3 country code:last 2 digits of year/number. I # pad the leading 0's for you, if you don't have 6. #($id,$office,$media,undef)=split(/[-.]/,$CSVF); ($id,$office,$media,undef)=split(/[-.]/,$CSVF); if ($media == "cddvd") { $media = "cd-dvd"; } print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print "\n"; print " \n"; print " <$media content-type=\"tiff\"/>\n"; print " \n"; print " \n"; print " $office\n"; print " \n"; $ref=1; while(){ chomp; # if you want the first column to be the reference # uncomment the next code line, and comment out the line # that follows it # ($ref,$val,undef)=split(/,/); $val=$_; #get the country code date pair and the number in sep. variables ($cty,$snum)=split(/\//,$val); #pad out the number to 6 digits, with leading 0's $nnum=sprintf("%06d",$snum); #split the country code and date into sep. variables #to use a real date, adjust the substr calls below $ncty=substr($cty,0,2); $ndate=substr($cty,2,2); # next line should work for 4 digit date #$ndate=substr($cty,2,4); #and make into a real date (note that if you wish to enter real dates #in your csv file, you may do so - just modify the code accordingly if($ndate<10){ # here is a long-term date dependency $ndate=$ndate+2000; }else{ $ndate=$ndate+1900; } #write out the real, correct p-doc number, print " \n"; # do it this way if you use your own reference #print " \n"; #it's ok to leave this even if you are inputting a reference number in the csv file $ref=$ref+1; # and add a line for each type requested, or "all" for all doctypes for( $i = 0; $i < @doctypes; $i++ ) { $ok=0; if( $doctypes[$i] =~ /all/ ){ print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; print " \n"; $ok=1; } if( $doctypes[$i] =~ /p-doc/ ){ print " \n"; $ok=1; } if( $doctypes[$i] =~ /pamphlet/ ){ print " \n"; $ok=1; } if( $doctypes[$i] =~ /declaration/ ){ print " \n"; $ok=1; } if( ($doctypes[$i] =~ /iper/ )&&(length($doctypes[$i])==4)){ print " \n"; $ok=1; } if( $doctypes[$i] =~ /et-iper/ ){ print " \n"; $ok=1; } if( $doctypes[$i] =~ /iasf/ ){ print " \n"; $ok=1; } if($ok==0){ print STDERR "Incorrect document type $doctypes[$i] entered on command line, exiting\n"; exit; } } print " \n"; } print "\n"; print STDERR "\n"; print STDERR "############### SUMMARY ###############\n"; print STDERR "# ",$ref-1," document entries processed from file $CSVF.\n"; print STDERR "# Call: GenXML-generic.pl -file=$CSVF"; for( $i = 0; $i < @doctypes; $i++ ) { print STDERR " -doctype=$doctypes[$i]"; if($i+1<@doctypes){ print STDERR ","; } } print STDERR "\n"; print STDERR "#######################################\n"; close IN; sub usage() { print STDERR "\n"; print STDERR "Usage: GenXML-generic.pl -file=filename -doctype=[all,iper,iasf,declaration,p-doc,et-iper]......\n"; print STDERR "(filename) must be of the format OfficeID-ST3CountryCode-mediaType.csv\n"; print STDERR "(OfficeID) can be any string but must not contain the - or . characters\n"; print STDERR "(ST3CountryCode) is the WIPO ST.3 Country Code for the requesting Office\n"; print STDERR "(mediaType) may be cddvd or ftp\n"; print STDERR "\nOutput XML is written to standard output and should be redirected to a file\n"; print STDERR "\n\n"; exit; }