jbsrc/tools/intltool-merge.in (39398B) - raw
1 #!@INTLTOOL_PERL@ -w 2 # -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 -*- 3 4 # 5 # The Intltool Message Merger 6 # 7 # Copyright (C) 2000, 2003 Free Software Foundation. 8 # Copyright (C) 2000, 2001 Eazel, Inc 9 # 10 # Intltool is free software; you can redistribute it and/or 11 # modify it under the terms of the GNU General Public License 12 # version 2 published by the Free Software Foundation. 13 # 14 # Intltool is distributed in the hope that it will be useful, 15 # but WITHOUT ANY WARRANTY; without even the implied warranty of 16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 # General Public License for more details. 18 # 19 # You should have received a copy of the GNU General Public License 20 # along with this program; if not, write to the Free Software 21 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22 # 23 # As a special exception to the GNU General Public License, if you 24 # distribute this file as part of a program that contains a 25 # configuration script generated by Autoconf, you may include it under 26 # the same distribution terms that you use for the rest of that program. 27 # 28 # Authors: Maciej Stachowiak <mjs@noisehavoc.org> 29 # Kenneth Christiansen <kenneth@gnu.org> 30 # Darin Adler <darin@bentspoon.com> 31 # 32 # Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net> 33 # 34 35 ## Release information 36 my $PROGRAM = "intltool-merge"; 37 my $PACKAGE = "intltool"; 38 my $VERSION = "0.37.1"; 39 40 ## Loaded modules 41 use strict; 42 use Getopt::Long; 43 use Text::Wrap; 44 use File::Basename; 45 46 my $must_end_tag = -1; 47 my $last_depth = -1; 48 my $translation_depth = -1; 49 my @tag_stack = (); 50 my @entered_tag = (); 51 my @translation_strings = (); 52 my $leading_space = ""; 53 54 ## Scalars used by the option stuff 55 my $HELP_ARG = 0; 56 my $VERSION_ARG = 0; 57 my $BA_STYLE_ARG = 0; 58 my $XML_STYLE_ARG = 0; 59 my $KEYS_STYLE_ARG = 0; 60 my $DESKTOP_STYLE_ARG = 0; 61 my $SCHEMAS_STYLE_ARG = 0; 62 my $RFC822DEB_STYLE_ARG = 0; 63 my $QUOTED_STYLE_ARG = 0; 64 my $QUOTEDXML_STYLE_ARG = 0; 65 my $QUIET_ARG = 0; 66 my $PASS_THROUGH_ARG = 0; 67 my $UTF8_ARG = 0; 68 my $MULTIPLE_OUTPUT = 0; 69 my $cache_file; 70 71 ## Handle options 72 GetOptions 73 ( 74 "help" => \$HELP_ARG, 75 "version" => \$VERSION_ARG, 76 "quiet|q" => \$QUIET_ARG, 77 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility 78 "ba-style|b" => \$BA_STYLE_ARG, 79 "xml-style|x" => \$XML_STYLE_ARG, 80 "keys-style|k" => \$KEYS_STYLE_ARG, 81 "desktop-style|d" => \$DESKTOP_STYLE_ARG, 82 "schemas-style|s" => \$SCHEMAS_STYLE_ARG, 83 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG, 84 "quoted-style" => \$QUOTED_STYLE_ARG, 85 "quotedxml-style" => \$QUOTEDXML_STYLE_ARG, 86 "pass-through|p" => \$PASS_THROUGH_ARG, 87 "utf8|u" => \$UTF8_ARG, 88 "multiple-output|m" => \$MULTIPLE_OUTPUT, 89 "cache|c=s" => \$cache_file 90 ) or &error; 91 92 my $PO_DIR; 93 my $FILE; 94 my $OUTFILE; 95 96 my %po_files_by_lang = (); 97 my %translations = (); 98 my $iconv = $ENV{"ICONV"} || "iconv"; 99 my $devnull = ($^O eq 'MSWin32' ? 'NUL:' : '/dev/null'); 100 101 sub isProgramInPath 102 { 103 my ($file) = @_; 104 # If either a file exists, or when run it returns 0 exit status 105 return 1 if ((-x $file) or (system("$file -l >$devnull") == 0)); 106 return 0; 107 } 108 109 if (! isProgramInPath ("$iconv")) 110 { 111 print STDERR " *** iconv is not found on this system!\n". 112 " *** Without it, intltool-merge can not convert encodings.\n"; 113 exit; 114 } 115 116 # Use this instead of \w for XML files to handle more possible characters. 117 my $w = "[-A-Za-z0-9._:]"; 118 119 # XML quoted string contents 120 my $q = "[^\\\"]*"; 121 122 ## Check for options. 123 124 if ($VERSION_ARG) 125 { 126 &print_version; 127 } 128 elsif ($HELP_ARG) 129 { 130 &print_help; 131 } 132 elsif ($BA_STYLE_ARG && @ARGV > 2) 133 { 134 &utf8_sanity_check; 135 &preparation; 136 &print_message; 137 &ba_merge_translations; 138 &finalize; 139 } 140 elsif ($XML_STYLE_ARG && @ARGV > 2) 141 { 142 &utf8_sanity_check; 143 &preparation; 144 &print_message; 145 &xml_merge_output; 146 &finalize; 147 } 148 elsif ($KEYS_STYLE_ARG && @ARGV > 2) 149 { 150 &utf8_sanity_check; 151 &preparation; 152 &print_message; 153 &keys_merge_translations; 154 &finalize; 155 } 156 elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) 157 { 158 &utf8_sanity_check; 159 &preparation; 160 &print_message; 161 &desktop_merge_translations; 162 &finalize; 163 } 164 elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) 165 { 166 &utf8_sanity_check; 167 &preparation; 168 &print_message; 169 &schemas_merge_translations; 170 &finalize; 171 } 172 elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2) 173 { 174 &preparation; 175 &print_message; 176 &rfc822deb_merge_translations; 177 &finalize; 178 } 179 elsif (($QUOTED_STYLE_ARG || $QUOTEDXML_STYLE_ARG) && @ARGV > 2) 180 { 181 &utf8_sanity_check; 182 &preparation; 183 &print_message; 184 "ed_merge_translations($QUOTEDXML_STYLE_ARG); 185 &finalize; 186 } 187 else 188 { 189 &print_help; 190 } 191 192 exit; 193 194 ## Sub for printing release information 195 sub print_version 196 { 197 print <<_EOF_; 198 ${PROGRAM} (${PACKAGE}) ${VERSION} 199 Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen. 200 201 Copyright (C) 2000-2003 Free Software Foundation, Inc. 202 Copyright (C) 2000-2001 Eazel, Inc. 203 This is free software; see the source for copying conditions. There is NO 204 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 205 _EOF_ 206 exit; 207 } 208 209 ## Sub for printing usage information 210 sub print_help 211 { 212 print <<_EOF_; 213 Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE 214 Generates an output file that includes some localized attributes from an 215 untranslated source file. 216 217 Mandatory options: (exactly one must be specified) 218 -b, --ba-style includes translations in the bonobo-activation style 219 -d, --desktop-style includes translations in the desktop style 220 -k, --keys-style includes translations in the keys style 221 -s, --schemas-style includes translations in the schemas style 222 -r, --rfc822deb-style includes translations in the RFC822 style 223 --quoted-style includes translations in the quoted string style 224 --quotedxml-style includes translations in the quoted xml string style 225 -x, --xml-style includes translations in the standard xml style 226 227 Other options: 228 -u, --utf8 convert all strings to UTF-8 before merging 229 (default for everything except RFC822 style) 230 -p, --pass-through deprecated, does nothing and issues a warning 231 -m, --multiple-output output one localized file per locale, instead of 232 a single file containing all localized elements 233 -c, --cache=FILE specify cache file name 234 (usually \$top_builddir/po/.intltool-merge-cache) 235 -q, --quiet suppress most messages 236 --help display this help and exit 237 --version output version information and exit 238 239 Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE") 240 or send email to <xml-i18n-tools\@gnome.org>. 241 _EOF_ 242 exit; 243 } 244 245 246 ## Sub for printing error messages 247 sub print_error 248 { 249 print STDERR "Try `${PROGRAM} --help' for more information.\n"; 250 exit; 251 } 252 253 254 sub print_message 255 { 256 print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG; 257 } 258 259 260 sub preparation 261 { 262 $PO_DIR = $ARGV[0]; 263 $FILE = $ARGV[1]; 264 $OUTFILE = $ARGV[2]; 265 266 &gather_po_files; 267 &get_translation_database; 268 } 269 270 # General-purpose code for looking up translations in .po files 271 272 sub po_file2lang 273 { 274 my ($tmp) = @_; 275 $tmp =~ s/^.*\/(.*)\.po$/$1/; 276 return $tmp; 277 } 278 279 sub gather_po_files 280 { 281 if (my $linguas = $ENV{"LINGUAS"}) 282 { 283 for my $lang (split / /, $linguas) { 284 my $po_file = $PO_DIR . "/" . $lang . ".po"; 285 if (-e $po_file) { 286 $po_files_by_lang{$lang} = $po_file; 287 } 288 } 289 } 290 else 291 { 292 if (open LINGUAS_FILE, "$PO_DIR/LINGUAS") 293 { 294 while (<LINGUAS_FILE>) 295 { 296 next if /^#/; 297 298 for my $lang (split) 299 { 300 chomp ($lang); 301 my $po_file = $PO_DIR . "/" . $lang . ".po"; 302 if (-e $po_file) { 303 $po_files_by_lang{$lang} = $po_file; 304 } 305 } 306 } 307 308 close LINGUAS_FILE; 309 } 310 else 311 { 312 for my $po_file (glob "$PO_DIR/*.po") { 313 $po_files_by_lang{po_file2lang($po_file)} = $po_file; 314 } 315 } 316 } 317 } 318 319 sub get_local_charset 320 { 321 my ($encoding) = @_; 322 my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "@INTLTOOL_LIBDIR@/charset.alias"; 323 324 # seek character encoding aliases in charset.alias (glib) 325 326 if (open CHARSET_ALIAS, $alias_file) 327 { 328 while (<CHARSET_ALIAS>) 329 { 330 next if /^\#/; 331 return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i) 332 } 333 334 close CHARSET_ALIAS; 335 } 336 337 # if not found, return input string 338 339 return $encoding; 340 } 341 342 sub get_po_encoding 343 { 344 my ($in_po_file) = @_; 345 my $encoding = ""; 346 347 open IN_PO_FILE, $in_po_file or die; 348 while (<IN_PO_FILE>) 349 { 350 ## example: "Content-Type: text/plain; charset=ISO-8859-1\n" 351 if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) 352 { 353 $encoding = $1; 354 last; 355 } 356 } 357 close IN_PO_FILE; 358 359 if (!$encoding) 360 { 361 print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG; 362 $encoding = "ISO-8859-1"; 363 } 364 365 system ("$iconv -f $encoding -t UTF-8 <$devnull 2>$devnull"); 366 if ($?) { 367 $encoding = get_local_charset($encoding); 368 } 369 370 return $encoding 371 } 372 373 sub utf8_sanity_check 374 { 375 print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG; 376 $UTF8_ARG = 1; 377 } 378 379 sub get_translation_database 380 { 381 if ($cache_file) { 382 &get_cached_translation_database; 383 } else { 384 &create_translation_database; 385 } 386 } 387 388 sub get_newest_po_age 389 { 390 my $newest_age; 391 392 foreach my $file (values %po_files_by_lang) 393 { 394 my $file_age = -M $file; 395 $newest_age = $file_age if !$newest_age || $file_age < $newest_age; 396 } 397 398 $newest_age = 0 if !$newest_age; 399 400 return $newest_age; 401 } 402 403 sub create_cache 404 { 405 print "Generating and caching the translation database\n" unless $QUIET_ARG; 406 407 &create_translation_database; 408 409 open CACHE, ">$cache_file" || die; 410 print CACHE join "\x01", %translations; 411 close CACHE; 412 } 413 414 sub load_cache 415 { 416 print "Found cached translation database\n" unless $QUIET_ARG; 417 418 my $contents; 419 open CACHE, "<$cache_file" || die; 420 { 421 local $/; 422 $contents = <CACHE>; 423 } 424 close CACHE; 425 %translations = split "\x01", $contents; 426 } 427 428 sub get_cached_translation_database 429 { 430 my $cache_file_age = -M $cache_file; 431 if (defined $cache_file_age) 432 { 433 if ($cache_file_age <= &get_newest_po_age) 434 { 435 &load_cache; 436 return; 437 } 438 print "Found too-old cached translation database\n" unless $QUIET_ARG; 439 } 440 441 &create_cache; 442 } 443 444 sub create_translation_database 445 { 446 for my $lang (keys %po_files_by_lang) 447 { 448 my $po_file = $po_files_by_lang{$lang}; 449 450 if ($UTF8_ARG) 451 { 452 my $encoding = get_po_encoding ($po_file); 453 454 if (lc $encoding eq "utf-8") 455 { 456 open PO_FILE, "<$po_file"; 457 } 458 else 459 { 460 print "NOTICE: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;; 461 462 open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|"; 463 } 464 } 465 else 466 { 467 open PO_FILE, "<$po_file"; 468 } 469 470 my $nextfuzzy = 0; 471 my $inmsgid = 0; 472 my $inmsgstr = 0; 473 my $msgid = ""; 474 my $msgstr = ""; 475 476 while (<PO_FILE>) 477 { 478 $nextfuzzy = 1 if /^#, fuzzy/; 479 480 if (/^msgid "((\\.|[^\\]+)*)"/ ) 481 { 482 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; 483 $msgid = ""; 484 $msgstr = ""; 485 486 if ($nextfuzzy) { 487 $inmsgid = 0; 488 } else { 489 $msgid = unescape_po_string($1); 490 $inmsgid = 1; 491 } 492 $inmsgstr = 0; 493 $nextfuzzy = 0; 494 } 495 496 if (/^msgstr "((\\.|[^\\]+)*)"/) 497 { 498 $msgstr = unescape_po_string($1); 499 $inmsgstr = 1; 500 $inmsgid = 0; 501 } 502 503 if (/^"((\\.|[^\\]+)*)"/) 504 { 505 $msgid .= unescape_po_string($1) if $inmsgid; 506 $msgstr .= unescape_po_string($1) if $inmsgstr; 507 } 508 } 509 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr; 510 } 511 } 512 513 sub finalize 514 { 515 } 516 517 sub unescape_one_sequence 518 { 519 my ($sequence) = @_; 520 521 return "\\" if $sequence eq "\\\\"; 522 return "\"" if $sequence eq "\\\""; 523 return "\n" if $sequence eq "\\n"; 524 return "\r" if $sequence eq "\\r"; 525 return "\t" if $sequence eq "\\t"; 526 return "\b" if $sequence eq "\\b"; 527 return "\f" if $sequence eq "\\f"; 528 return "\a" if $sequence eq "\\a"; 529 return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7) 530 531 return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/); 532 return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/); 533 534 # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489 535 536 return $sequence; 537 } 538 539 sub unescape_po_string 540 { 541 my ($string) = @_; 542 543 $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg; 544 545 return $string; 546 } 547 548 sub entity_decode 549 { 550 local ($_) = @_; 551 552 s/'/'/g; # ' 553 s/"/"/g; # " 554 s/</</g; 555 s/>/>/g; 556 s/&/&/g; 557 558 return $_; 559 } 560 561 # entity_encode: (string) 562 # 563 # Encode the given string to XML format (encode '<' etc). 564 565 sub entity_encode 566 { 567 my ($pre_encoded) = @_; 568 569 my @list_of_chars = unpack ('C*', $pre_encoded); 570 571 # with UTF-8 we only encode minimalistic 572 return join ('', map (&entity_encode_int_minimalist, @list_of_chars)); 573 } 574 575 sub entity_encode_int_minimalist 576 { 577 return """ if $_ == 34; 578 return "&" if $_ == 38; 579 return "'" if $_ == 39; 580 return "<" if $_ == 60; 581 return ">" if $_ == 62; 582 return chr $_; 583 } 584 585 sub entity_encoded_translation 586 { 587 my ($lang, $string) = @_; 588 589 my $translation = $translations{$lang, $string}; 590 return $string if !$translation; 591 return entity_encode ($translation); 592 } 593 594 ## XML (bonobo-activation specific) merge code 595 596 sub ba_merge_translations 597 { 598 my $source; 599 600 { 601 local $/; # slurp mode 602 open INPUT, "<$FILE" or die "can't open $FILE: $!"; 603 $source = <INPUT>; 604 close INPUT; 605 } 606 607 open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!"; 608 # Binmode so that selftest works ok if using a native Win32 Perl... 609 binmode (OUTPUT) if $^O eq 'MSWin32'; 610 611 while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) 612 { 613 print OUTPUT $1; 614 615 my $node = $2 . "\n"; 616 617 my @strings = (); 618 $_ = $node; 619 while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) { 620 push @strings, entity_decode($3); 621 } 622 print OUTPUT; 623 624 my %langs; 625 for my $string (@strings) 626 { 627 for my $lang (keys %po_files_by_lang) 628 { 629 $langs{$lang} = 1 if $translations{$lang, $string}; 630 } 631 } 632 633 for my $lang (sort keys %langs) 634 { 635 $_ = $node; 636 s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s; 637 s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg; 638 print OUTPUT; 639 } 640 } 641 642 print OUTPUT $source; 643 644 close OUTPUT; 645 } 646 647 648 ## XML (non-bonobo-activation) merge code 649 650 651 # Process tag attributes 652 # Only parameter is a HASH containing attributes -> values mapping 653 sub getAttributeString 654 { 655 my $sub = shift; 656 my $do_translate = shift || 0; 657 my $language = shift || ""; 658 my $result = ""; 659 my $translate = shift; 660 foreach my $e (reverse(sort(keys %{ $sub }))) { 661 my $key = $e; 662 my $string = $sub->{$e}; 663 my $quote = '"'; 664 665 $string =~ s/^[\s]+//; 666 $string =~ s/[\s]+$//; 667 668 if ($string =~ /^'.*'$/) 669 { 670 $quote = "'"; 671 } 672 $string =~ s/^['"]//g; 673 $string =~ s/['"]$//g; 674 675 if ($do_translate && $key =~ /^_/) { 676 $key =~ s|^_||g; 677 if ($language) { 678 # Handle translation 679 my $decode_string = entity_decode($string); 680 my $translation = $translations{$language, $decode_string}; 681 if ($translation) { 682 $translation = entity_encode($translation); 683 $string = $translation; 684 } 685 $$translate = 2; 686 } else { 687 $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" $translate 688 } 689 } 690 691 $result .= " $key=$quote$string$quote"; 692 } 693 return $result; 694 } 695 696 # Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree 697 sub getXMLstring 698 { 699 my $ref = shift; 700 my $spacepreserve = shift || 0; 701 my @list = @{ $ref }; 702 my $result = ""; 703 704 my $count = scalar(@list); 705 my $attrs = $list[0]; 706 my $index = 1; 707 708 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); 709 $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/)); 710 711 while ($index < $count) { 712 my $type = $list[$index]; 713 my $content = $list[$index+1]; 714 if (! $type ) { 715 # We've got CDATA 716 if ($content) { 717 # lets strip the whitespace here, and *ONLY* here 718 $content =~ s/\s+/ /gs if (!$spacepreserve); 719 $result .= $content; 720 } 721 } elsif ( "$type" ne "1" ) { 722 # We've got another element 723 $result .= "<$type"; 724 $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements 725 if ($content) { 726 my $subresult = getXMLstring($content, $spacepreserve); 727 if ($subresult) { 728 $result .= ">".$subresult . "</$type>"; 729 } else { 730 $result .= "/>"; 731 } 732 } else { 733 $result .= "/>"; 734 } 735 } 736 $index += 2; 737 } 738 return $result; 739 } 740 741 # Translate list of nodes if necessary 742 sub translate_subnodes 743 { 744 my $fh = shift; 745 my $content = shift; 746 my $language = shift || ""; 747 my $singlelang = shift || 0; 748 my $spacepreserve = shift || 0; 749 750 my @nodes = @{ $content }; 751 752 my $count = scalar(@nodes); 753 my $index = 0; 754 while ($index < $count) { 755 my $type = $nodes[$index]; 756 my $rest = $nodes[$index+1]; 757 if ($singlelang) { 758 my $oldMO = $MULTIPLE_OUTPUT; 759 $MULTIPLE_OUTPUT = 1; 760 traverse($fh, $type, $rest, $language, $spacepreserve); 761 $MULTIPLE_OUTPUT = $oldMO; 762 } else { 763 traverse($fh, $type, $rest, $language, $spacepreserve); 764 } 765 $index += 2; 766 } 767 } 768 769 sub isWellFormedXmlFragment 770 { 771 my $ret = eval 'require XML::Parser'; 772 if(!$ret) { 773 die "You must have XML::Parser installed to run $0\n\n"; 774 } 775 776 my $fragment = shift; 777 return 0 if (!$fragment); 778 779 $fragment = "<root>$fragment</root>"; 780 my $xp = new XML::Parser(Style => 'Tree'); 781 my $tree = 0; 782 eval { $tree = $xp->parse($fragment); }; 783 return $tree; 784 } 785 786 sub traverse 787 { 788 my $fh = shift; 789 my $nodename = shift; 790 my $content = shift; 791 my $language = shift || ""; 792 my $spacepreserve = shift || 0; 793 794 if (!$nodename) { 795 if ($content =~ /^[\s]*$/) { 796 $leading_space .= $content; 797 } 798 print $fh $content; 799 } else { 800 # element 801 my @all = @{ $content }; 802 my $attrs = shift @all; 803 my $translate = 0; 804 my $outattr = getAttributeString($attrs, 1, $language, \$translate); 805 806 if ($nodename =~ /^_/) { 807 $translate = 1; 808 $nodename =~ s/^_//; 809 } 810 my $lookup = ''; 811 812 $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/)); 813 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); 814 815 print $fh "<$nodename", $outattr; 816 if ($translate) { 817 $lookup = getXMLstring($content, $spacepreserve); 818 if (!$spacepreserve) { 819 $lookup =~ s/^\s+//s; 820 $lookup =~ s/\s+$//s; 821 } 822 823 if ($lookup || $translate == 2) { 824 my $translation = $translations{$language, $lookup} if isWellFormedXmlFragment($translations{$language, $lookup}); 825 if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) { 826 $translation = $lookup if (!$translation); 827 print $fh " xml:lang=\"", $language, "\"" if $language; 828 print $fh ">"; 829 if ($translate == 2) { 830 translate_subnodes($fh, \@all, $language, 1, $spacepreserve); 831 } else { 832 print $fh $translation; 833 } 834 print $fh "</$nodename>"; 835 836 return; # this means there will be no same translation with xml:lang="$language"... 837 # if we want them both, just remove this "return" 838 } else { 839 print $fh ">"; 840 if ($translate == 2) { 841 translate_subnodes($fh, \@all, $language, 1, $spacepreserve); 842 } else { 843 print $fh $lookup; 844 } 845 print $fh "</$nodename>"; 846 } 847 } else { 848 print $fh "/>"; 849 } 850 851 for my $lang (sort keys %po_files_by_lang) { 852 if ($MULTIPLE_OUTPUT && $lang ne "$language") { 853 next; 854 } 855 if ($lang) { 856 # Handle translation 857 # 858 my $translate = 0; 859 my $localattrs = getAttributeString($attrs, 1, $lang, \$translate); 860 my $translation = $translations{$lang, $lookup} if isWellFormedXmlFragment($translations{$lang, $lookup}); 861 if ($translate && !$translation) { 862 $translation = $lookup; 863 } 864 865 if ($translation || $translate) { 866 print $fh "\n"; 867 $leading_space =~ s/.*\n//g; 868 print $fh $leading_space; 869 print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">"; 870 if ($translate == 2) { 871 translate_subnodes($fh, \@all, $lang, 1, $spacepreserve); 872 } else { 873 print $fh $translation; 874 } 875 print $fh "</$nodename>"; 876 } 877 } 878 } 879 880 } else { 881 my $count = scalar(@all); 882 if ($count > 0) { 883 print $fh ">"; 884 my $index = 0; 885 while ($index < $count) { 886 my $type = $all[$index]; 887 my $rest = $all[$index+1]; 888 traverse($fh, $type, $rest, $language, $spacepreserve); 889 $index += 2; 890 } 891 print $fh "</$nodename>"; 892 } else { 893 print $fh "/>"; 894 } 895 } 896 } 897 } 898 899 sub intltool_tree_comment 900 { 901 my $expat = shift; 902 my $data = shift; 903 my $clist = $expat->{Curlist}; 904 my $pos = $#$clist; 905 906 push @$clist, 1 => $data; 907 } 908 909 sub intltool_tree_cdatastart 910 { 911 my $expat = shift; 912 my $clist = $expat->{Curlist}; 913 my $pos = $#$clist; 914 915 push @$clist, 0 => $expat->original_string(); 916 } 917 918 sub intltool_tree_cdataend 919 { 920 my $expat = shift; 921 my $clist = $expat->{Curlist}; 922 my $pos = $#$clist; 923 924 $clist->[$pos] .= $expat->original_string(); 925 } 926 927 sub intltool_tree_char 928 { 929 my $expat = shift; 930 my $text = shift; 931 my $clist = $expat->{Curlist}; 932 my $pos = $#$clist; 933 934 # Use original_string so that we retain escaped entities 935 # in CDATA sections. 936 # 937 if ($pos > 0 and $clist->[$pos - 1] eq '0') { 938 $clist->[$pos] .= $expat->original_string(); 939 } else { 940 push @$clist, 0 => $expat->original_string(); 941 } 942 } 943 944 sub intltool_tree_start 945 { 946 my $expat = shift; 947 my $tag = shift; 948 my @origlist = (); 949 950 # Use original_string so that we retain escaped entities 951 # in attribute values. We must convert the string to an 952 # @origlist array to conform to the structure of the Tree 953 # Style. 954 # 955 my @original_array = split /\x/, $expat->original_string(); 956 my $source = $expat->original_string(); 957 958 # Remove leading tag. 959 # 960 $source =~ s|^\s*<\s*(\S+)||s; 961 962 # Grab attribute key/value pairs and push onto @origlist array. 963 # 964 while ($source) 965 { 966 if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/) 967 { 968 $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s; 969 push @origlist, $1; 970 push @origlist, '"' . $2 . '"'; 971 } 972 elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/) 973 { 974 $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s; 975 push @origlist, $1; 976 push @origlist, "'" . $2 . "'"; 977 } 978 else 979 { 980 last; 981 } 982 } 983 984 my $ol = [ { @origlist } ]; 985 986 push @{ $expat->{Lists} }, $expat->{Curlist}; 987 push @{ $expat->{Curlist} }, $tag => $ol; 988 $expat->{Curlist} = $ol; 989 } 990 991 sub readXml 992 { 993 my $filename = shift || return; 994 if(!-f $filename) { 995 die "ERROR Cannot find filename: $filename\n"; 996 } 997 998 my $ret = eval 'require XML::Parser'; 999 if(!$ret) { 1000 die "You must have XML::Parser installed to run $0\n\n"; 1001 } 1002 my $xp = new XML::Parser(Style => 'Tree'); 1003 $xp->setHandlers(Char => \&intltool_tree_char); 1004 $xp->setHandlers(Start => \&intltool_tree_start); 1005 $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart); 1006 $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend); 1007 my $tree = $xp->parsefile($filename); 1008 1009 # <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo> 1010 # would be: 1011 # [foo, [{}, head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar, [{}, 1012 # 0, "Howdy", ref, [{}]], 0, "do" ] ] 1013 1014 return $tree; 1015 } 1016 1017 sub print_header 1018 { 1019 my $infile = shift; 1020 my $fh = shift; 1021 my $source; 1022 1023 if(!-f $infile) { 1024 die "ERROR Cannot find filename: $infile\n"; 1025 } 1026 1027 print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n}; 1028 { 1029 local $/; 1030 open DOCINPUT, "<${FILE}" or die; 1031 $source = <DOCINPUT>; 1032 close DOCINPUT; 1033 } 1034 if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s) 1035 { 1036 print $fh "$1\n"; 1037 } 1038 elsif ($source =~ /(<!DOCTYPE[^>]*>)/s) 1039 { 1040 print $fh "$1\n"; 1041 } 1042 } 1043 1044 sub parseTree 1045 { 1046 my $fh = shift; 1047 my $ref = shift; 1048 my $language = shift || ""; 1049 1050 my $name = shift @{ $ref }; 1051 my $cont = shift @{ $ref }; 1052 1053 while (!$name || "$name" eq "1") { 1054 $name = shift @{ $ref }; 1055 $cont = shift @{ $ref }; 1056 } 1057 1058 my $spacepreserve = 0; 1059 my $attrs = @{$cont}[0]; 1060 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)); 1061 1062 traverse($fh, $name, $cont, $language, $spacepreserve); 1063 } 1064 1065 sub xml_merge_output 1066 { 1067 my $source; 1068 1069 if ($MULTIPLE_OUTPUT) { 1070 for my $lang (sort keys %po_files_by_lang) { 1071 if ( ! -d $lang ) { 1072 mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n"; 1073 } 1074 open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n"; 1075 binmode (OUTPUT) if $^O eq 'MSWin32'; 1076 my $tree = readXml($FILE); 1077 print_header($FILE, \*OUTPUT); 1078 parseTree(\*OUTPUT, $tree, $lang); 1079 close OUTPUT; 1080 print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG; 1081 } 1082 if ( ! -d "C" ) { 1083 mkdir "C" or -d "C" or die "Cannot create subdirectory C: $!\n"; 1084 } 1085 open OUTPUT, ">C/$OUTFILE" or die "Cannot open C/$OUTFILE: $!\n"; 1086 binmode (OUTPUT) if $^O eq 'MSWin32'; 1087 my $tree = readXml($FILE); 1088 print_header($FILE, \*OUTPUT); 1089 parseTree(\*OUTPUT, $tree); 1090 close OUTPUT; 1091 print "CREATED C/$OUTFILE\n" unless $QUIET_ARG; 1092 } else { 1093 open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n"; 1094 binmode (OUTPUT) if $^O eq 'MSWin32'; 1095 my $tree = readXml($FILE); 1096 print_header($FILE, \*OUTPUT); 1097 parseTree(\*OUTPUT, $tree); 1098 close OUTPUT; 1099 print "CREATED $OUTFILE\n" unless $QUIET_ARG; 1100 } 1101 } 1102 1103 sub keys_merge_translation 1104 { 1105 my ($lang) = @_; 1106 1107 if ( ! -d $lang && $MULTIPLE_OUTPUT) 1108 { 1109 mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n"; 1110 } 1111 1112 open INPUT, "<${FILE}" or die "Cannot open ${FILE}: $!\n"; 1113 open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n"; 1114 binmode (OUTPUT) if $^O eq 'MSWin32'; 1115 1116 while (<INPUT>) 1117 { 1118 if (s/^(\s*)_(\w+=(.*))/$1$2/) 1119 { 1120 my $string = $3; 1121 1122 if (!$MULTIPLE_OUTPUT) 1123 { 1124 print OUTPUT; 1125 1126 my $non_translated_line = $_; 1127 1128 for my $lang (sort keys %po_files_by_lang) 1129 { 1130 my $translation = $translations{$lang, $string}; 1131 next if !$translation; 1132 1133 $_ = $non_translated_line; 1134 s/(\w+)=.*/[$lang]$1=$translation/; 1135 print OUTPUT; 1136 } 1137 } 1138 else 1139 { 1140 my $non_translated_line = $_; 1141 my $translation = $translations{$lang, $string}; 1142 $translation = $string if !$translation; 1143 1144 $_ = $non_translated_line; 1145 s/(\w+)=.*/$1=$translation/; 1146 print OUTPUT; 1147 } 1148 } 1149 else 1150 { 1151 print OUTPUT; 1152 } 1153 } 1154 1155 close OUTPUT; 1156 close INPUT; 1157 1158 print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG; 1159 } 1160 1161 sub keys_merge_translations 1162 { 1163 if ($MULTIPLE_OUTPUT) 1164 { 1165 for my $lang (sort keys %po_files_by_lang) 1166 { 1167 keys_merge_translation ($lang); 1168 } 1169 keys_merge_translation ("C"); 1170 } 1171 else 1172 { 1173 keys_merge_translation ("."); 1174 } 1175 } 1176 1177 sub desktop_merge_translations 1178 { 1179 open INPUT, "<${FILE}" or die; 1180 open OUTPUT, ">${OUTFILE}" or die; 1181 binmode (OUTPUT) if $^O eq 'MSWin32'; 1182 1183 while (<INPUT>) 1184 { 1185 if (s/^(\s*)_(\w+=(.*))/$1$2/) 1186 { 1187 my $string = $3; 1188 1189 print OUTPUT; 1190 1191 my $non_translated_line = $_; 1192 1193 for my $lang (sort keys %po_files_by_lang) 1194 { 1195 my $translation = $translations{$lang, $string}; 1196 next if !$translation; 1197 1198 $_ = $non_translated_line; 1199 s/(\w+)=.*/${1}[$lang]=$translation/; 1200 print OUTPUT; 1201 } 1202 } 1203 else 1204 { 1205 print OUTPUT; 1206 } 1207 } 1208 1209 close OUTPUT; 1210 close INPUT; 1211 } 1212 1213 sub schemas_merge_translations 1214 { 1215 my $source; 1216 1217 { 1218 local $/; # slurp mode 1219 open INPUT, "<$FILE" or die "can't open $FILE: $!"; 1220 $source = <INPUT>; 1221 close INPUT; 1222 } 1223 1224 open OUTPUT, ">$OUTFILE" or die; 1225 binmode (OUTPUT) if $^O eq 'MSWin32'; 1226 1227 # FIXME: support attribute translations 1228 1229 # Empty nodes never need translation, so unmark all of them. 1230 # For example, <_foo/> is just replaced by <foo/>. 1231 $source =~ s|<\s*_($w+)\s*/>|<$1/>|g; 1232 1233 while ($source =~ s/ 1234 (.*?) 1235 (\s+)(<locale\ name="C">(\s*) 1236 (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*) 1237 (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*) 1238 (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*) 1239 <\/locale>) 1240 //sx) 1241 { 1242 print OUTPUT $1; 1243 1244 my $locale_start_spaces = $2 ? $2 : ''; 1245 my $default_spaces = $4 ? $4 : ''; 1246 my $short_spaces = $7 ? $7 : ''; 1247 my $long_spaces = $10 ? $10 : ''; 1248 my $locale_end_spaces = $13 ? $13 : ''; 1249 my $c_default_block = $3 ? $3 : ''; 1250 my $default_string = $6 ? $6 : ''; 1251 my $short_string = $9 ? $9 : ''; 1252 my $long_string = $12 ? $12 : ''; 1253 1254 print OUTPUT "$locale_start_spaces$c_default_block"; 1255 1256 $default_string =~ s/\s+/ /g; 1257 $default_string = entity_decode($default_string); 1258 $short_string =~ s/\s+/ /g; 1259 $short_string = entity_decode($short_string); 1260 $long_string =~ s/\s+/ /g; 1261 $long_string = entity_decode($long_string); 1262 1263 for my $lang (sort keys %po_files_by_lang) 1264 { 1265 my $default_translation = $translations{$lang, $default_string}; 1266 my $short_translation = $translations{$lang, $short_string}; 1267 my $long_translation = $translations{$lang, $long_string}; 1268 1269 next if (!$default_translation && !$short_translation && 1270 !$long_translation); 1271 1272 print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">"; 1273 1274 print OUTPUT "$default_spaces"; 1275 1276 if ($default_translation) 1277 { 1278 $default_translation = entity_encode($default_translation); 1279 print OUTPUT "<default>$default_translation</default>"; 1280 } 1281 1282 print OUTPUT "$short_spaces"; 1283 1284 if ($short_translation) 1285 { 1286 $short_translation = entity_encode($short_translation); 1287 print OUTPUT "<short>$short_translation</short>"; 1288 } 1289 1290 print OUTPUT "$long_spaces"; 1291 1292 if ($long_translation) 1293 { 1294 $long_translation = entity_encode($long_translation); 1295 print OUTPUT "<long>$long_translation</long>"; 1296 } 1297 1298 print OUTPUT "$locale_end_spaces</locale>"; 1299 } 1300 } 1301 1302 print OUTPUT $source; 1303 1304 close OUTPUT; 1305 } 1306 1307 sub rfc822deb_merge_translations 1308 { 1309 my %encodings = (); 1310 for my $lang (keys %po_files_by_lang) { 1311 $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang})); 1312 } 1313 1314 my $source; 1315 1316 $Text::Wrap::huge = 'overflow'; 1317 $Text::Wrap::break = qr/\n|\s(?=\S)/; 1318 1319 { 1320 local $/; # slurp mode 1321 open INPUT, "<$FILE" or die "can't open $FILE: $!"; 1322 $source = <INPUT>; 1323 close INPUT; 1324 } 1325 1326 open OUTPUT, ">${OUTFILE}" or die; 1327 binmode (OUTPUT) if $^O eq 'MSWin32'; 1328 1329 while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg) 1330 { 1331 my $sep = $1; 1332 my $non_translated_line = $3.$4; 1333 my $string = $5; 1334 my $underscore = length($2); 1335 next if $underscore eq 0 && $non_translated_line =~ /^#/; 1336 # Remove [] dummy strings 1337 my $stripped = $string; 1338 $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2; 1339 $stripped =~ s/\[\s[^\[\]]*\]$//; 1340 $non_translated_line .= $stripped; 1341 1342 print OUTPUT $sep.$non_translated_line; 1343 1344 if ($underscore) 1345 { 1346 my @str_list = rfc822deb_split($underscore, $string); 1347 1348 for my $lang (sort keys %po_files_by_lang) 1349 { 1350 my $is_translated = 1; 1351 my $str_translated = ''; 1352 my $first = 1; 1353 1354 for my $str (@str_list) 1355 { 1356 my $translation = $translations{$lang, $str}; 1357 1358 if (!$translation) 1359 { 1360 $is_translated = 0; 1361 last; 1362 } 1363 1364 # $translation may also contain [] dummy 1365 # strings, mostly to indicate an empty string 1366 $translation =~ s/\[\s[^\[\]]*\]$//; 1367 1368 if ($first) 1369 { 1370 if ($underscore eq 2) 1371 { 1372 $str_translated .= $translation; 1373 } 1374 else 1375 { 1376 $str_translated .= 1377 Text::Tabs::expand($translation) . 1378 "\n"; 1379 } 1380 } 1381 else 1382 { 1383 if ($underscore eq 2) 1384 { 1385 $str_translated .= ', ' . $translation; 1386 } 1387 else 1388 { 1389 $str_translated .= Text::Tabs::expand( 1390 Text::Wrap::wrap(' ', ' ', $translation)) . 1391 "\n .\n"; 1392 } 1393 } 1394 $first = 0; 1395 1396 # To fix some problems with Text::Wrap::wrap 1397 $str_translated =~ s/(\n )+\n/\n .\n/g; 1398 } 1399 next unless $is_translated; 1400 1401 $str_translated =~ s/\n \.\n$//; 1402 $str_translated =~ s/\s+$//; 1403 1404 $_ = $non_translated_line; 1405 s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s; 1406 print OUTPUT; 1407 } 1408 } 1409 } 1410 print OUTPUT "\n"; 1411 1412 close OUTPUT; 1413 close INPUT; 1414 } 1415 1416 sub rfc822deb_split 1417 { 1418 # Debian defines a special way to deal with rfc822-style files: 1419 # when a value contain newlines, it consists of 1420 # 1. a short form (first line) 1421 # 2. a long description, all lines begin with a space, 1422 # and paragraphs are separated by a single dot on a line 1423 # This routine returns an array of all paragraphs, and reformat 1424 # them. 1425 # When first argument is 2, the string is a comma separated list of 1426 # values. 1427 my $type = shift; 1428 my $text = shift; 1429 $text =~ s/^[ \t]//mg; 1430 return (split(/, */, $text, 0)) if $type ne 1; 1431 return ($text) if $text !~ /\n/; 1432 1433 $text =~ s/([^\n]*)\n//; 1434 my @list = ($1); 1435 my $str = ''; 1436 1437 for my $line (split (/\n/, $text)) 1438 { 1439 chomp $line; 1440 if ($line =~ /^\.\s*$/) 1441 { 1442 # New paragraph 1443 $str =~ s/\s*$//; 1444 push(@list, $str); 1445 $str = ''; 1446 } 1447 elsif ($line =~ /^\s/) 1448 { 1449 # Line which must not be reformatted 1450 $str .= "\n" if length ($str) && $str !~ /\n$/; 1451 $line =~ s/\s+$//; 1452 $str .= $line."\n"; 1453 } 1454 else 1455 { 1456 # Continuation line, remove newline 1457 $str .= " " if length ($str) && $str !~ /\n$/; 1458 $str .= $line; 1459 } 1460 } 1461 1462 $str =~ s/\s*$//; 1463 push(@list, $str) if length ($str); 1464 1465 return @list; 1466 } 1467 1468 sub quoted_translation 1469 { 1470 my ($xml_mode, $lang, $string) = @_; 1471 1472 $string = entity_decode($string) if $xml_mode; 1473 $string =~ s/\\\"/\"/g; 1474 1475 my $translation = $translations{$lang, $string}; 1476 $translation = $string if !$translation; 1477 $translation = entity_encode($translation) if $xml_mode; 1478 $translation =~ s/\"/\\\"/g; 1479 return $translation 1480 } 1481 1482 sub quoted_merge_translations 1483 { 1484 my ($xml_mode) = @_; 1485 1486 if (!$MULTIPLE_OUTPUT) { 1487 print "Quoted only supports Multiple Output.\n"; 1488 exit(1); 1489 } 1490 1491 for my $lang (sort keys %po_files_by_lang) { 1492 if ( ! -d $lang ) { 1493 mkdir $lang or -d $lang or die "Cannot create subdirectory $lang: $!\n"; 1494 } 1495 open INPUT, "<${FILE}" or die; 1496 open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n"; 1497 binmode (OUTPUT) if $^O eq 'MSWin32'; 1498 while (<INPUT>) 1499 { 1500 s/\"(([^\"]|\\\")*[^\\\"])\"/"\"" . "ed_translation($xml_mode, $lang, $1) . "\""/ge; 1501 print OUTPUT; 1502 } 1503 close OUTPUT; 1504 close INPUT; 1505 } 1506 }