Server IP : 195.201.23.43 / Your IP : 13.58.187.29 Web Server : Apache System : Linux webserver2.vercom.be 5.4.0-192-generic #212-Ubuntu SMP Fri Jul 5 09:47:39 UTC 2024 x86_64 User : kdecoratie ( 1041) PHP Version : 7.1.33-63+ubuntu20.04.1+deb.sury.org+1 Disable Function : pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wifcontinued,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_get_handler,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,pcntl_async_signals, MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : OFF | Sudo : ON | Pkexec : ON Directory : /usr/share/webmin/bin/ |
Upload File : |
#!/usr/bin/env perl # language-manager - Automatic translation and/or transcoding language files for all or specific modules use strict; use warnings; use 5.014; use File::Spec; use File::Basename; use File::Find; use JSON::PP; use HTTP::Tiny; eval "use HTML::Entities"; use List::MoreUtils qw(any uniq); use Cwd qw(cwd); use Encode qw/encode decode/; use Encode::Detect::Detector; use Term::ANSIColor qw(:constants); use Getopt::Long qw(:config permute pass_through); use Pod::Usage; sub main { my %data; # Register start time $data{'start_time'} = time(); # Prevent accidental script termination my $sigkill = sub { if ($data{'sigkill'}++ > 5) { say RED, "\nTerminating: Ctrl-C has been pressed more than 5 times ..", RESET; exit; } }; $SIG{INT} = \&$sigkill; # Get user options from the command line my %opt; GetOptions('help|h' => \$opt{'help'}, 'mode|x:s' => \$opt{'mode'}, 'type|w:s' => \$opt{'type'}, 'modules|m:s' => \$opt{'modules'}, 'modules-exclude|me:s' => \$opt{'modules-exclude'}, 'language-target|t:s' => \$opt{'language-target'}, 'language-target-exclude|te:s' => \$opt{'language-target-exclude'}, 'language-source|s:s' => \$opt{'language-source'}, 'language-source-exclude|se:s' => \$opt{'language-source-exclude'}, 'language-source-ignore-auto|sia!' => \$opt{'language-source-ignore-auto'}, 'language-source-encoding|e:s' => \$opt{'language-source-encoding'}, 'only-diff|od!' => \$opt{'only-diff'}, 'only-transcode|ot!' => \$opt{'only-transcode'}, 'keys-exclude|ke:s' => \$opt{'keys-exclude'}, 'keys-force-translate|kft:s' => \$opt{'keys-force-translate'}, 'keys-force-html|kfh:s' => \$opt{'keys-force-html'}, 'keys-test|kt:s' => \$opt{'keys-test'}, 'values-fix|vf:s' => \$opt{'values-fix'}, 'values-fix-delimiter|vfd:s' => \$opt{'values-fix-delimiter'}, 'translate-format|tf:s' => \$opt{'translate-format'}, 'allow-symlinks|as!' => \$opt{'allow-symlinks'}, 'git-commit|gc!' => \$opt{'git-commit'}, 'log|l:s' => \$opt{'log'}, 'verbose|v:i' => \$opt{'verbose'}, 'yes|y:i' => \$opt{'assumeyes'}); # Print help and exit pod2usage(0) if ($opt{'help'}); # Enforce verbose output by default if (!defined($opt{'verbose'})) { $opt{'verbose'} = 1; } # Enforce yes for all questions if (defined($opt{'assumeyes'})) { $opt{'assumeyes'} = 1; } # Get current path my $path = cwd; # Load Webmin lib my $lib = "web-lib-funcs.pl"; if (!-r "$path/$lib") { $path = dirname(dirname($0)); if (!-r "$path/$lib") { $path = $path = Cwd::realpath('..'); } } require("$path/$lib"); # Enforce user specific target: language directory, config.info or module.info file {lang|ulang|config|uconfig|module} my $type = -d "$path/ulang" ? 'ulang' : 'lang'; $opt{'type'} && ($type = $opt{'type'}); # Store path and type $data{'path'} = $path; $data{'type'} = $type; # Set default format to text $opt{'translate-format'} ||= 'text'; # Force HTML format for "module" type translates if (defined($opt{'type'}) && $opt{'type'} =~ /module|help/) { $opt{'translate-format'} = 'html'; } # Check if we can get Google Translate API token $data{'token'} = get_google_translate_token(); if (!$data{'token'}) { $opt{'only-diff'} = 1; $opt{'only-diff-auto'} = 1; say YELLOW . "Translation will not be done as Google Translate API key is missing. Falling back to `only-diff` mode.." . RESET; } # Get list of languages from `lang_list.txt` file $data{'languages_source_list'} = list_languages_local(\%data); $data{'languages_source_list_codes'} = [map {$_->{'lang'}} @{ $data{'languages_source_list'} }]; # Define base language. Default is set English $opt{'language-source'} ||= 'en'; my $language_source = $opt{'language-source'}; # Check, if source language exists if (!any {$_ =~ /^$language_source/} @{ $data{'languages_source_list_codes'} }) { errors('language-source', $language_source); exit; } # What language encoding do we expect on human translated files $opt{'language-source-encoding'} ||= 'utf-8'; # Always exclude source language my $language_target_exclude = $opt{'language-target-exclude'}; if ($language_target_exclude) { $language_target_exclude .= ",$language_source"; } else { $language_target_exclude = $language_source; } # Don't put on the list user excluded languages if ($language_target_exclude) { my @languages_excluded = split(',', $language_target_exclude); my @languages_allowed; for my $language (@{ $data{'languages_source_list_codes'} }) { if (!any {$_ =~ /^$language$/} @languages_excluded) { push(@languages_allowed, $language); } } if (@languages_allowed) { $data{'languages_source_list_codes'} = [@languages_allowed]; } } # Define the mode $opt{'mode'} ||= 'sync'; # Allow symlinks my $allow_symlinks = $opt{'allow-symlinks'}; # Find out which modules to update, if exist my $modules = $opt{'modules'}; my @modules; my @modules_exclude = $opt{'modules-exclude'} ? split(',', $opt{'modules-exclude'}) : (); if ($modules) { my @ml = split(',', $modules); foreach my $module (@ml) { my ($exists) = source_data($module, \%data, \%opt); if ($exists) { if (!any {$_ =~ /^$module$/} @modules_exclude) { push(@modules, $module); } } } @modules = sort @modules; } else { my $f; find( { wanted => sub { $f = $File::Find::name; my $d = -d $f; my $l = $f =~ /$type\/$language_source$/; my $i = $f =~ /$type\.info$/; my $h = $d && ($f =~ s/$path//r =~ tr/\///) == 2 && $f =~ /$path\/.*\/$type$/; if ((!-l $f || $allow_symlinks) && ($l || $i || $h)) { $f =~ s/^$path\///g; $f =~ s/\/$type\/$language_source//g; $f =~ s/\/$type\.info//g; $f =~ s/\/$language_source//g; $f =~ s/\/$language_source//g; $f =~ s/\/$type$//g if ($d); if (!any {$_ =~ /^$f$/} @modules_exclude) { push(@modules, $f); } } }, follow => $allow_symlinks, }, $path); @modules = sort(uniq(@modules)); } $data{'modules'} = \@modules; # Define target language(s) or translate all $opt{'language-target'} = [$opt{'language-target'} ? split(',', $opt{'language-target'}) : ()]; if (@{ $opt{'language-target'} }) { my @bad_languages; for my $language (@{ $opt{'language-target'} }) { push(@bad_languages, $language) if !any {$_ =~ /^$language$/} @{ $data{'languages_source_list_codes'} }; } if (@bad_languages) { errors('language-target', join(',', @bad_languages)); exit; } } # Test only given keys in bulk translation. Translated strings are not saved $opt{'keys-test'} = [$opt{'keys-test'} ? split(',', $opt{'keys-test'}) : ()]; # Exclude listed keys from bulk translation $opt{'keys-exclude'} = [$opt{'keys-exclude'} ? split(',', $opt{'keys-exclude'}) : ()]; # Forced list of keys that will be re-translated $opt{'keys-force-translate'} = [$opt{'keys-force-translate'} ? split(',', $opt{'keys-force-translate'}) : ()]; # Forced list of keys to run translations in HTML format $opt{'keys-force-html'} = [$opt{'keys-force-html'} ? split(',', $opt{'keys-force-html'}) : ()]; # Value fix delimiter. Default is `:` $opt{'values-fix-delimiter'} ||= ':'; # Build a list of value to check based on source file, and replace with, in all other languages $opt{'values-fix'} = [$opt{'values-fix'} ? split(',', $opt{'values-fix'}) : ()]; # Just run tests, and exit, without writing anything if (@{ $opt{'keys-test'} }) { say CYAN, "Translation testing for selected keys is about to start ..", RESET; if (prompt('next', \%opt)) { go(\%opt, \%data); } } else { # Log the output. Start $opt{'log'} ||= "/tmp/language-manager-@{[time()]}.log"; open $data{'out'}, ">", "$opt{'log'}" or die RED, "Error creating log: $!\n", RESET, "\n"; # User interactions talk('affected', \%opt, \%data); # Run in overwrite mode if ($opt{'mode'} eq 'clean') { # # Execute clean talk('clean-pre', \%opt, \%data); if (prompt('next', \%opt)) { go(\%opt, \%data); } } # Run in overwrite mode elsif ($opt{'mode'} eq 'full') { # Execute force transcode/translate talk('overwrite-pre', \%opt, \%data); if (prompt('next', \%opt)) { go(\%opt, \%data); } } # Simply translate newly added strings to source language (def. en), # to all targets ".auto". Deleted keys on source language will also # be removed from targets, in both, human translated and ".auto" files else { # Execute language fixer if (@{ $opt{'values-fix'} }) { talk('fix-pre', \%opt, \%data); } # Execute language fix for double encoded strings elsif ($opt{'mode'} eq 'transcode') { talk('transcode-pre', \%opt, \%data); } # Execute language sync else { talk('sync-pre', \%opt, \%data); } if (prompt('next', \%opt)) { go(\%opt, \%data); } } # Log the output. End close $data{'out'}; } return 0; } main(); # The following is used to get correct language files based on old language table. # This function, should not be really needed, unless compiling languages with # Webmin version prior to 1.950 (first time), as Webmin 1.950+ is going to # use new langauge map and have all related language files in UTF-8 already sub language_map { my ($code) = @_; my %language_map = ( # Use plain language codes 'ja' => 'ja_JP.euc', 'ko' => 'ko_KR.euc', 'ms' => 'ms_MY', 'ru' => 'ru_RU', 'uk' => 'uk_UA', 'zh' => 'zh_CN', 'zh_TW' => 'zh_TW.Big5', # Czech is `cs` not `cz`, as there is no `cz` at all 'cs' => 'cz', # Slovenian is `sl` not `si`, as `si` is Sinhala 'sl' => 'si', # Greek is `el` not `gr` 'el' => 'gr',); if ($language_map{$code}) { $code = $language_map{$code}; } return $code; } # Always check, if strings that are about to be translated haven't been # translated by human already, and if so, extract them with correct # encoding, to be further converted to UTF-8 and stored on destination file sub language_source_file { my ($opt, $data, %data) = @_; my $language_code = $data{'language-code'}; my %language_source_file; my %language_source_file_auto; my $language_source_file; my $language_source_file_encoding = 'utf-8'; # Ignore user defined source languages my $language_source_exclude = $opt->{'language-source-exclude'}; if ($language_source_exclude) { my @languages_excluded = split(',', $language_source_exclude); if (any {$_ =~ /^$language_code$/} @languages_excluded) { return (undef, undef); } } # Correct language code according to the old language map if ($opt->{'language-source-encoding'} eq 'map') { $language_code = language_map($language_code); } # Set prefix in case of processing `config.info` or `module.info` my $language_source_file_target_ = "/"; if (defined($opt->{'type'}) && $opt->{'type'} =~ /config|uconfig|module/) { $language_source_file_target_ = "/$opt->{'type'}.info."; } # Get already translated language strings from current directory my $language_source_file_target = "$data{'module-path'}$language_source_file_target_$language_code"; if (-r $language_source_file_target) { $language_source_file = $language_source_file_target; } # Load machine translated file if ($opt->{'mode'} ne 'full') { my $language_source_file_target_auto = "$language_source_file_target.auto"; if (-r $language_source_file_target_auto) { read_file($language_source_file_target_auto, \%language_source_file_auto); talk_log( ("" . GREEN . " .. Found machine translated file for $data{'language-name'} ($language_code.auto)" . RESET . "" ), $data, 1); } else { say YELLOW, " .. No machine translated file has be found for $data{'language-name'} ($language_code.auto)", RESET; } } # Supply proper encoding for existing language files if ($language_source_file) { talk_log(("" . GREEN . " .. Found human translated file for $data{'language-name'} ($language_code)" . RESET . ""), $data, 1); read_file("$language_source_file", \%language_source_file); # Force encoding based on map if ($opt->{'mode'} eq 'full' && $opt->{'language-source-encoding'} eq 'map') { my $code = $data{'language-code'}; my $map_auto; ($language_source_file_encoding, $map_auto) = language_file_encoding($code, $language_source_file_target); talk_log( ("" . CYAN . " .. Force file encoding to \`$language_source_file_encoding\`" . ($map_auto // '') . " as derived from language map" . RESET . "" ), $data, 1); } # Figure out encoding automatically elsif ($opt->{'mode'} eq 'full' && $opt->{'language-source-encoding'} eq 'auto') { my $language_source_file_data = read_file_contents($language_source_file_target); $language_source_file_encoding = Encode::Detect::Detector::detect($language_source_file_data); if (!$language_source_file_encoding) { $language_source_file_encoding = ask(' ' . BRIGHT_RED . '.. Cannot detect encoding, enter it manually' . RESET . ''); talk_log( ("" . BRIGHT_RED . " .. Manually set file encoding to \`$language_source_file_encoding\`" . RESET . ""), $data, 1); } else { talk_log( ("" . MAGENTA . " .. Automatically detected file encoding is \`$language_source_file_encoding\`" . RESET . "" ), $data, 1); } } else { $language_source_file_encoding = $opt->{'language-source-encoding'}; talk_log(("" . MAGENTA . " .. Set file encoding to default \`$language_source_file_encoding\`" . RESET . ""), $data, 1); } } else { say YELLOW, " .. No human translated file has been found for $data{'language-name'} ($language_code)", RESET; } return (\%language_source_file, \%language_source_file_auto, $language_source_file_encoding, $language_source_file_target); } sub language_file_encoding { my ($code, $file) = @_; my $encoding = 'utf-8'; my $auto; if ($code eq 'ja') { $encoding = "euc-jp"; } elsif ($code eq 'ko') { $encoding = "euc-kr"; } elsif ($code eq 'ru' || $code eq 'bg' || $code eq 'uk') { $encoding = "cp1251"; } elsif ($code eq 'ca' || $code eq 'fr' || $code eq 'hr' || $code eq 'lt' || $code eq 'no') { $encoding = "cp1252"; } elsif ($code eq 'cs' || $code eq 'sk' || $code eq 'pl' || $code eq 'sl' || $code eq 'hu') { $encoding = "iso-8859-2"; } elsif ($code eq 'tr') { $encoding = "iso-8859-9"; } elsif ($code eq 'he') { $encoding = "cp1255"; } elsif ($code eq 'th') { $encoding = "tis-620"; } elsif ($code eq 'zh') { $encoding = "gb2312"; } elsif ($code eq 'zh_TW') { $encoding = "big5"; } else { my $file_data = read_file_contents($file); my $detected = Encode::Detect::Detector::detect($file); if ($detected) { $encoding = $detected; $auto = " (auto)"; } else { $encoding = 'utf-8'; $auto = " (auto enforced)"; } } return ($encoding, $auto); } sub language_disallowed { my ($code, $opt) = @_; my $language_target = $opt->{'language-target'}; my $language_target_exclude = $opt->{'language-target-exclude'}; # Process only user defined languages or do all if (@{$language_target}) { if (!any {$_ =~ /^$code$/} @{$language_target}) { return 1; } } # Do not process excluded languages if ($language_target_exclude) { my @languages_excluded = split(',', $language_target_exclude); if (any {$_ =~ /^$code$/} @languages_excluded) { return 1; } } } sub language_transcode { my ($string, $encoding, $opt) = @_; # In case it's a `config` or `uconfig` file, preserve # , and -, as a literal characters # Warning - it will be removed in the near future, after config parser for translator is created my $type = $opt->{'type'}; my $type_config = $type =~ /config|uconfig/ if ($type); my $utf8 = 'utf-8'; if ($type_config) { $string =~ s/,/~,~,~/g; $string =~ s/-/~-~-~/g; } # Preserve actual tags $string =~ s/ /~!!SS!!~/g; $string =~ s/</~!~!!~/g; $string =~ s/>/~!!~!~/g; if ($encoding eq $utf8) { # Handle consecutive entities first my @entities = ($string =~ /&#\d+;[&#\d+;]{2,}/g); foreach my $entity (@entities) { my $decoded_intity = decode_entities($entity); $string =~ s/$entity/$decoded_intity/g; } # Fix the string finally utf8::decode($string); $string = decode_entities($string); utf8::encode($string); } else { eval {$string = decode($encoding, $string)}; if ($@) { say "Error found: $@"; if (!prompt('next', $opt)) { exit; } } $string = decode_entities($string); $string = encode($utf8, $string); } # Restore special commas and dashes if ($type_config) { $string =~ s/~-~-~/-/g; $string =~ s/~,~,~/,/g; } # Restore escaped tags $string =~ s/~!!~!~/>/g; $string =~ s/~!~!!~/</g; $string =~ s/~!!SS!!~/ /g; return $string; } sub source_data { my ($module, $data, $opt) = @_; my ($allow_symlinks, $language_source, $type, $target, $target_help, $source_file, $source_file_, $exists); $allow_symlinks = $opt->{'allow-symlinks'}; $language_source = $opt->{'language-source'}; $type = $data->{'type'}; $target_help = ''; if ($type eq 'config' || $type eq 'uconfig' || $type eq 'module') { $language_source = $language_source ne 'en' ? "$type.info.$language_source" : "$type.info"; } if ($type eq 'help') { $target_help = '/help'; } $target = "$data->{'path'}/$module$target_help"; $source_file = "$target/$type/$language_source"; $source_file_ = "$target/$language_source"; $exists = ( -d $target && (!-l $target || $allow_symlinks) && ((-r $source_file && (!-l $source_file || $allow_symlinks)) || (-r $source_file_ && (!-l $source_file_ || $allow_symlinks)) || $target_help) ) || 0; if (-r $source_file && (!-l $source_file || $allow_symlinks)) { $target .= "/$type"; } elsif (-r $source_file_ && (!-l $source_file_ || $allow_symlinks)) { $source_file = $source_file_; } return ($exists, $target, $source_file); } # Returns an array of supported languages, # taken from Webmin's lang_list.txt file. sub list_languages_local { my ($data) = @_; my ($key, $value, $options, $language, @languages); open(my $LANG, "$data->{'path'}/lang_list.txt"); while (<$LANG>) { if (/^(.*=\w+)\s+(.*)/) { $language = { 'desc' => $2 }; foreach $options (split(/,/, $1)) { if ($options =~ /^([^=]+)=(.*)$/) { $key = $1; $value = $2; $key =~ s/^\s+|\s+$//g; $value =~ s/^\s+|\s+$//g; $language->{$key} = $value; } } push(@languages, $language); } } close($LANG); @languages = sort {$a->{'desc'} cmp $b->{'desc'}} @languages; return wantarray ? @languages : \@languages; } # Prepare the string that is going to be sent to translator sub translate_substitute { my ($value, $opt) = @_; my $format = $opt->{'translate-format'}; $format = 'html' if ($opt->{'translate-format-html'}); $value = language_transcode($value, 'utf-8', $opt); # Preserve un-quoted $1, $2, $3 in strings, which are broken in so different ways, in different languages differently if ($format eq 'text') { $value =~ s/(?<!['|"|`|“|«|=|?|&])(\$(\d+))/%$2/g; } # Preserve path in strings elsif ($format eq 'html') { $value =~ s/<tt>/<tt translate="no">/gm; $value =~ s/(?:(^(?<!<)(\/[\w+].*?),|^(?<!<)(\/[\w+].*?)\.|(?<!<)(\/[\w+].*?) |(?<!<)(\/[\w+].*?)$))/<span translate="no">"$1"<\/span>/g; # Wrap $1, $2 and etc into a tag with translate=no attr skipping already wrapped $value =~ s/(?<!["])(\$\d+)(?!["'])/<span translate="no">$1<\/span>/g; # Never translate email addresses, as translations to some languages break it $value =~ s/([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,63})/<span translate="no">$1<\/span>/g; } return $value; } # Perform white sorcery on returned string from translator, # as it seems to be breaking quite a lot of things sub translated_substitute { my ($translated, $original, $code, $rtl, $opt) = @_; my $format = $opt->{'translate-format'}; $format = 'html' if ($opt->{'translate-format-html'}); my $remove_spaces = sub { my ($str) = @_; $str =~ s/[ ]+//g; return $str; }; # Fix broken by translator HTML closing tags $translated =~ s/<\/[ ]*(\w+)>/<\/$1>/g; $translated =~ s/<[ ](.*?)[ ]/<$1/g; if ($format eq 'text') { # Unescape preserved, un-quoted $1, $2, $3 $translated =~ s/\%[ ]+(\d+)/ \$$1 /gi; # If replacement is changed to localized version $translated =~ s/%(\d+)/ \$$1/gi; # If the string starts with $1 and contains a space $translated =~ s/(^[ ]+\$)/\$/gi; # Get rid from undesirable spaces in parentheses $translated =~ s/([ ]+\))/)/gi; $translated =~ s/([ ]+\))/)/gi; $translated =~ s/(\([ ]+)/(/gi; $translated =~ s/(\([ ]+)/(/gi; # Print actual values with percent sign $translated =~ s/\([ ]+\$(\d+)[ ]*%\)/(\$$1 %)/gi; # If translated $1 incorrectly replaced with %1 if ($original !~ /%\d+/ && $translated =~ /%\d+/) { $translated =~ s/%(\d+)/\$$1/g; } # The following, is abnormal way to make things work, around of the bugs produced by Google Translate API for (bg). if ($code eq 'bg') { if ($original =~ /\$(\d+)/ && $translated !~ /\$(\d+)/) { $translated =~ s/(\d+)/\$$1/g; } } # For RTL substitute `\d ٪` with `$\d` if ($rtl) { $translated =~ s/(٪[ ]+(\d+))/\$$2/g; # Fix stuck, consecutive $2$1 $translated =~ s/\$(\d+)\$(\d+)/\$$1 \$$2/g; # Urdu returned as ٪1 if ($code eq 'ur') { $translated =~ s/((\d+)٪)/\$$2/g; } } # Language specific fixes if ($code eq 'th') { $translated =~ s/(([3-9])%)/\$$2/g; } if ($code eq 'ro') { $translated =~ s/ (([1-9])%) / \$$2 /g; } } elsif ($format eq 'html') { # Unescape path in strings $translated =~ s/<span.*?translate.*?>(.*?)<\/.*?>/$1/gi; # Remove any escapes as returned by translator, when run in HTML mode $translated =~ s/"//gi; $translated =~ s/'/'/gi; $translated =~ s/<tt translate="no">/<tt>/gm; $translated =~ s/<span translate="no">(.*?)<\/span>/$1/gm; } # Restore destroyed tags $translated =~ s/<[ ]*(?:(\?\w+|((?![br|hr|p|tt|pre|ul|li|ol])\w+).*?))[ ]*>(.*?)<\/.*?>/<$1>$3<\/$2>/gi; $translated =~ s/([ ]*)\$[ ]*(\d+)/$1\$$2/g; $translated =~ s/(['|"|`|“|«])[ ]*\$(\d+)[ ]*(['|"|`|“|»])/$1\$$2$3/g; $translated =~ s/(<.*?>)[ ]*(.*?)[ ]*(<\/.*?>)/$1$2$3/g; $translated =~ s/\$(\d+)[ ]*([:|:])[ ]*\$(\d+)[ ]*/ \$$1:\$$3 /g; $translated =~ s/\$(\d+)[ ]*:[ ]*\$(\d+)/\$$1:\$$2/g; $translated =~ s/(\p{L})\$(\d+)[ ]+/$1 \$$2 /g; $translated =~ s/(\p{L})[ ]+([:|:]){2}[ ]+(\p{L})/$1::$3/g; # Remove any whitespaces $translated =~ s/([ ]+)/ /g; # Fix trailing dot, which is separated from a word $translated =~ s/(.)[ ]+\./$1./g; # Fix trailing comma, which is separated from a word $translated =~ s/(.)[ ]+,/$1,/g; # Preserve inner formatting for tag's attrs $translated =~ s/<(\w+)[ ]+(\w+)(.*?)(=)[ ]*(.*?)[ ]*(\w+)/<$1 $2=$5$6/g; # Last .. should also be preceded by space $translated =~ s/[ ]*(\.\.)$/ ../g; # Preserve dates example formatting $translated =~ s/(\p{L}{2,4}) \/ (\p{L}{2,3}) \/ (\p{L}{2,4}) (\(.*?\))/$1\/$2\/$3 $4/g; # If initial value contains in the end of the string `text : ` or `text : $1`, # then print it as such and not as `text: ` or `text: $1` if ($original =~ /(?:([ ]+:[ ]*$|[ ]+:[ ]*\$\d+$))/) { $translated =~ s/(?:(:[ ]*[^:]+$|:$))/ $1/g; # Fix incorrectly positioned $1 :$2 if ($translated =~ /\$(\d+)[ ]+:\$\d+/) { $translated =~ s/\$(\d+)[ ]+:\$(\d+)/\$$1 : \$$2/; } } # If initial string contains `/$1/` then format translated accordingly if ($original =~ /\/\$\d+\//) { $translated =~ s/\/[ ]*\$(\d+)[ ]*\//\/\$$1\//g; } # If initial string contains `($2 %)` then format translated accordingly if ($original =~ /\(\$(\d+)[ ]+%\)/) { $translated =~ s/\(\$(\d+)[ ]*%\)/(\$$1 %)/g; } # Always fix properly included path if ($original =~ /<tt>[\/|~\/].*?<\/tt>/) { $translated =~ s/(<tt>)([\/|~\/].*?)(<\/tt>)/$1@{[&$remove_spaces($2)]}$3/g; } # Always consider `/` to be a delimiter and replace ` / ` to `/` if ($original =~ /\w+\/\w+/) { $translated =~ s/[ ]+(\/)[ ]+/$1/g; } # Treat `+` it is not not as ` + ` if ($original =~ /\w+\+\w+/) { $translated =~ s/[ ]+(\+)[ ]+/$1/g; } # The following should have no spaces, e.g. `<tt>uid=joe,dc=my-domain,dc=com</tt>` $translated =~ s/(<tt>)(uid[ ]*=.*?)(<\/tt>)/$1@{[&$remove_spaces($2)]}$3/g; # Always fix glued URL and target (e.g. for de, ja, nl, zh, zh-TW) $translated =~ s/(href=)(.*?)(\/target=_)/$1$2\/ target=_/g; # If original string contains ` % ` make sure output also has it if ($original =~ / % /) { $translated =~ s/[ ]*%[ ]*/ % /g; $translated =~ s/[ ]*٪[ ]*/ ٪ /g; } # If original string contains `${VAR}`, fix the output if ($original =~ /\$\{/ || $original =~ /\{\$/) { $translated =~ s/\$ \{/\${/g; $translated =~ s/\{\$ /{\$/g; $translated =~ s/\.\.\. /.../g; } # If original ends with `..` fix translator changing it to `.` if ($original =~ /\.\.\s*$/) { my ($spaces) = $original =~ /(\s*)\.\.\s*$/; $translated =~ s/\s*[.]+$/$spaces../; } # If original doesn't end with `.` fix translator adding a dot if ($original !~ /\.$/ && $translated =~ /\.$/) { $translated =~ s/[.]+$//; } # If original value on template file contained escaped HTML entities, do the same on translated string if ($original =~ /<|>/ && $original !~ /<|>/ && $translated =~ /<|>/) { $translated =~ s/</</g; $translated =~ s/>/>/g; } # If original string contains "\n" character, fix it, as it's broken in text mode translations if ($original =~ /\\n/) { $translated =~ s/(?|(\\\s*n\s*\\\s*n\s*\\\s*n\s*\\\s*n\s*\\\s*n)|(\\\s*n\s*\\\s*n\s*\\\s*n\s*\\\s*n)|(\\\s*n\s*\\\s*n\s*\\\s*n)|(\\\s*n\s*\\\s*n)|(\\\s*n))/@{[lc(&$remove_spaces($1))]}/gi; } # Fix translator bug for Russian language, when the translation string # starting with <i></i>, and being replaced to <Я></? or <Я></�> if ($code eq 'ru') { $translated =~ s/<Я>(.*?)<\/.>/<i>$1<\/i>/gi; $translated =~ s/<Я>(.*?)<\/\?/<i>$1<\/i>/gi; } # Fix altered escaped HTML. Happens, for example, in Japanese language translations $translated =~ s/&lt;/</g; $translated =~ s/&gt;/>/g; $translated =~ s/&nbsp;/ /g; # Fix reserved words $translated = translated_substitute_reserved($translated); return $translated; } # Fix odd conversion for some languages sub translated_substitute_reserved { my ($translated) = @_; $translated =~ s/VirtualMin/Virtualmin/gm; $translated =~ s/« </«</gim; return $translated; } # Make actual translation using Google Translate API sub translate { my ($data, $opt, $target, $value) = @_; my $source = $opt->{'language-source'}; my $format = $opt->{'translate-format'}; $format = 'html' if ($opt->{'translate-format-html'}); # Updating Google Translate API token to avoid expiration my $time = time(); if ((($time - $data->{'start_time'}) / 60) > 10) { say CYAN, "Updating Google Translate API token..", RESET; $data->{'token'} = get_google_translate_token(); $data->{'start_time'} = $time; } my $token = $data->{'token'}; # Replace language code to match what translator expects $target =~ s/_/-/; my $tr; my $rsp = "https://translation.googleapis.com/language/translate/v2"; my $rsh = { 'Authorization' => "Bearer \"@{[trim($token)]}\"", 'User-Agent' => 'curl/7.29.1', 'Content-Type' => 'application/json; charset=utf-8', }; my $rsc = "{ 'source': '" . $source . "', 'target': '" . $target . "', 'format': '" . $format . "', 'q': '" . quotemeta($value) . "'}"; my $rs = HTTP::Tiny->new()->request('POST' => $rsp, { 'headers' => $rsh, 'content' => $rsc }); my $ts = $rs->{'success'}; my $tc = $rs->{'content'}; # Exctract translation on success if ($ts) { $tr = JSON::PP->new->decode($rs->{'content'}); $tr = $tr->{'data'}->{'translations'}[0]->{'translatedText'}; return $tr; } # On error just try again in 5 seconds else { say YELLOW, "Error: Stopped when translating `$target` language", RESET; print RED, "Error: Google Translator - $tc", RESET; say CYAN, "Retrying automatically in 5 seconds ..", RESET; sleep 5; my $translated = translate($data, $opt, $target, $value); return $translated; } } # Run transcoding or translation of module(s) for the very first time sub go { my ($opt, $data) = @_; my ($module, $language); my $path = $data->{'path'}; my $type = $data->{'type'}; my $token = $data->{'token'}; my $modules = $data->{'modules'}; my $language_source = $opt->{'language-source'}; my $language_source_encoding = $opt->{'language-source-encoding'}; my $language_source_ignore_auto = $opt->{'language-source-ignore-auto'}; my $language_target = $opt->{'language-target'}; my $language_target_exclude = $opt->{'language-target-exclude'}; my $keys_exclude = $opt->{'keys-exclude'}; my $keys_force_translate = $opt->{'keys-force-translate'}; my $keys_force_html = $opt->{'keys-force-html'}; my $keys_test = $opt->{'keys-test'}; my $values_fix = $opt->{'values-fix'}; my $git_commit = $opt->{'git-commit'}; my $verbose = $opt->{'verbose'} || @{$keys_test}; my $mode_sync = $opt->{'mode'} ne 'full'; my $mode_transcode = $opt->{'mode'} eq 'transcode'; my $mode_clean = $opt->{'mode'} eq 'clean'; my $allow_symlinks = $opt->{'allow-symlinks'}; my $verbose_silent_mode = $mode_sync && $verbose != 2; if ($type eq 'help') { foreach $module (@{$modules}) { my @module_help; my @module_help_info; my $ext = '.html'; my $utf8 = '.UTF-8'; my $old_map = $language_source_encoding eq 'map'; my $only_transcode = $opt->{'only-transcode'}; my ($exists, $help_path) = source_data($module, $data, $opt); # Check if there has been something to process, if not print a message my $output; # If cleaning called in this mode, throw an error if ($mode_clean) { say RED, "Error: Cleaning can only be performed when the target type is unset!", RESET; exit; } # Build targets first talk_log(("Transcoding/translating " . CYAN BOLD, $module, RESET . " module's help .."), $data, 1); talk_log(("" . CYAN . " .. Building list of help files to process" . RESET . ""), $data, 1); find( { wanted => sub { my $found = $File::Find::name; my $found_nonutf8 = $found =~ s/$utf8//r; my $found_nonutf8_big5 = $found_nonutf8 =~ s/$ext/.Big5$ext/r; my $found_nonutf8_euc = $found_nonutf8 =~ s/$ext/.euc$ext/r; my $found_relative_name = $found =~ s/$data->{'path'}\/$module\/$type\///r; # Check if file exists in both UTF-8 and original encoding, if so, keep original only my @found_nonutf8 = ($found_nonutf8, $found_nonutf8_big5, $found_nonutf8_euc); foreach my $found_nonutf8 (@found_nonutf8) { if (-r $found_nonutf8 && $found ne $found_nonutf8) { talk_log( ("" . RED . " .. Deleting duplicate file in UTF-8 encoding..\n - $found_relative_name" . RESET . "" ), $data, 1); unlink($found); } elsif (-f $found) { push(@module_help, $found); push(@module_help_info, $found_relative_name); } } }, follow => $allow_symlinks, }, $help_path); @module_help = sort(uniq(@module_help)); @module_help_info = sort(uniq(@module_help_info)); if (scalar(@module_help)) { talk_log( ("" . GREEN . " .. Found help files to process" . RESET . " \n - @{[join(\"\n - \", @module_help_info)]}" ), $data, 1); } # Store template files my @templates = (); # Store language codes, for which human translations have been done my @help_translated_language_codes = (); foreach $language (@{ $data->{'languages_source_list'} }) { # Get target language code and other attributes my $code = $language->{'lang'}; my $code_ = language_map($code); my $code__ = $code_ =~ s/\.(euc|Big5)//r; my $rtl = $language->{'rtl'}; # Skip translating source, base language next if ($code eq $language_source); # Process only user defined languages or do all; do not process excluded languages if ($only_transcode) { next if language_disallowed($code, $opt); } # Transcode each help file first foreach my $help_file (@module_help) { my $source; if ($help_file =~ /(?|\.($code__|$code).(UTF-8)\.html|\.($code__|$code).(euc)\.html|\.($code__|$code).(Big5)\.html|\.($code__|$code)\.html)/) { my $e_lang = $1; my $e_attr = $2 || ''; $e_lang = "$e_lang.$e_attr" if ($e_attr); # Final file name to be writtten my $help_file_write = $help_file =~ s/$e_lang/$code/r; my $help_file_write_short = $help_file_write =~ s/.*\/(.+)$/$1/r; my $help_file_short = $help_file =~ s/.*\/(.+)$/$1/r; # Search for old format files and encodings, and convert at first if ($old_map) { # Rename, if old style format if ($code ne $e_lang || $e_attr) { talk_log( ("" . YELLOW . " .. Renaming help file to new format - " . RED . "$help_file_short" . RESET . " --> " . GREEN . "$help_file_write_short" . RESET . "" . RESET . "" ), $data, 1); rename_file($help_file, $help_file_write); } # Transcode, if needed if ($e_attr ne "UTF-8") { talk_log( ("" . BRIGHT_MAGENTA . " .. Transcoding help file $help_file_write_short" . RESET . "" ), $data, 1); my $data_original = read_file_contents($help_file_write); my ($data_encoding) = language_file_encoding($code, $help_file_write_short); my $data_converted = Encode::encode('utf-8', Encode::decode($data_encoding, $data_original)); if ($data_encoding eq 'utf-8') { $data_converted = $data_original; } $data_converted = translated_substitute_reserved($data_converted); write_file_contents($help_file_write, $data_converted); chmod(0664, $help_file_write); $output++; } } push(@help_translated_language_codes, $code); } elsif ($help_file =~ /\/(?:[\w\-])+\.html$/) { push(@templates, $help_file); } } } @templates = sort(uniq(@templates)); @help_translated_language_codes = sort(uniq(@help_translated_language_codes)); # Translate help files that don't have human translations if (!$only_transcode) { my @help_untranslated_language_codes = grep {!language_disallowed($_, $opt)} @{ $data->{'languages_source_list_codes'} }; my $help_untranslated_language_codes = scalar(@help_untranslated_language_codes); my $help_translated_language_codes = scalar(@help_translated_language_codes); if ($help_untranslated_language_codes) { talk_log( ("" . GREEN . " .. Found help file(s) for potential translation to $help_untranslated_language_codes more language(s), aside from $help_translated_language_codes already translated language(s)" . RESET . " \n - @{[join(\"\n - \", map {my $v = $_ =~ s/$path\/$module\/$type\///r; $v} @templates)]}" ), $data, 1); foreach my $help_file_to_translate (@templates) { foreach my $untranslated_language_code (@help_untranslated_language_codes) { my $help_file_translated_auto = $help_file_to_translate =~ s/(.*?)(\.html)$/$1.$untranslated_language_code.auto$2/r; my $help_file_translated_human = $help_file_to_translate =~ s/(.*?)(\.html)$/$1.$untranslated_language_code$2/r; my $help_file_translated_auto_short = $help_file_translated_auto =~ s/$path\///r; my $help_file_translated_human_short = $help_file_translated_human =~ s/$path\///r; # Process only user defined languages or do all; do not process excluded languages next if language_disallowed($untranslated_language_code, $opt); # If a file has human translation variant already if (-r $help_file_translated_human) { if (-r $help_file_translated_auto) { talk_log( ("" . BRIGHT_MAGENTA . " .. human translation variant of file " . YELLOW . "$help_file_translated_human_short" . RESET . " already exists" . RESET . "" ), $data, 1); unlink($help_file_translated_auto); } next; } # If file is already translated, skip if (-r $help_file_translated_auto && !$language_source_ignore_auto) { talk_log( ("" . WHITE . " .. kept existing language file " . RED . "$help_file_translated_auto_short" . RESET . " intact" . RESET . "" ), $data, 1); next; } # Open template file that is going to be translated my $help_file_to_translate_content = read_file_contents($help_file_to_translate); # Tags that should not be translated $help_file_to_translate_content =~ s/<tt>/<tt translate="no">/gm if ($help_file_to_translate_content =~ /<tt>.*?(\(\)|[_%$@=.:\/\\]+).*?<\/tt>/gm); $help_file_to_translate_content =~ s/<code>/<code translate="no">/gm; $help_file_to_translate_content =~ s/<kbd>/<kbd translate="no">/gm; # Perform actual translation my $translated = translate($data, $opt, $untranslated_language_code, $help_file_to_translate_content); if ($translated) { $translated =~ s/<kbd translate="no">/<kbd>/gm; $translated =~ s/<code translate="no">/<code>/gm; $translated =~ s/<tt translate="no">/<tt>/gm; $translated = translated_substitute_reserved($translated); write_file_contents($help_file_translated_auto, $translated); talk_log( ("" . WHITE . " .. translated to " . GREEN . "$untranslated_language_code" . RESET . " language and stored to " . GREEN . "$help_file_translated_auto_short" . RESET . " file" . RESET . "" ), $data, 1); $output++; } } } } } if (!$output) { talk_log(("Nothing to do for " . CYAN . "$module" . RESET . " module .."), $data, 1); } } } else { foreach $module (@{$modules}) { my (%template); my ($exists, $mpath, $mfile) = source_data($module, $data, $opt); # Check if there has been something to process, if not print a message my $output; # Get source, base language file read_file($mfile, \%template); # Set message type my $message_type_s1 = 'Transcoding/translating'; $message_type_s1 = 'Searching/replacing in' if (@{$values_fix}); $message_type_s1 = 'Cleaning in' if ($mode_clean); talk_log(("$message_type_s1 " . BLUE BOLD, $module, RESET . " module .."), $data, 1); foreach $language (@{ $data->{'languages_source_list'} }) { # Check if there has been something to process, if not print a message my $output_; # Get target language code my $code = $language->{'lang'}; # Skip translating source, base language next if ($code eq $language_source); # Process only user defined languages or do all; do not process excluded languages next if language_disallowed($code, $opt); # Get other target language attributes my $name = $language->{'desc'}; my $rtl = $language->{'rtl'}; my %language; my %language_auto; # If in clean mode delete the file and go next if ($mode_clean) { # Language files my $cfile = "$mpath/$code"; my $cfileauto = "$cfile.auto"; unlink($cfile); unlink($cfileauto); # Module files foreach ('module', 'config', 'uconfig') { my %mdata = %{$data}; $mdata{'type'} = $_; my (undef, undef, $ffile) = source_data($module, \%mdata, $opt); $ffile =~ s/\/$_\//\//; my $cxfile = "$ffile.$code"; my $cxfileauto = "$cxfile.auto"; unlink($cxfile); unlink($cxfileauto); } # Help files my %hdata = %{$data}; $hdata{'type'} = 'help'; my (undef, $hpath) = source_data($module, \%hdata, $opt); my @hdelete_targets; if (-d $hpath) { find( { wanted => sub { my $found = $File::Find::name; if ($found =~ /\.$code\./) { push(@hdelete_targets, $found); } }, }, $hpath); unlink(@hdelete_targets); } # Go next, don't translate $output++; next; } my $message_type_s2 = "Processing"; $message_type_s2 = "Testing translations for selected keys with" if (@{$keys_test}); talk_log(("" . YELLOW . " .. $message_type_s2 $name ($code) language .." . RESET . ""), $data, $opt->{'verbose'}); my ($language_source_file, $language_source_file_auto, $language_source_file_encoding, $language_source_file_target); if (!@{$keys_test}) { # Get pre-translated strings ($language_source_file, $language_source_file_auto, $language_source_file_encoding, $language_source_file_target) = language_source_file($opt, $data, ('language-name' => $name, 'language-code' => $code, 'module-path' => $mpath, 'module-name' => $module, 'translation-target' => $type )); if (-l $language_source_file_target && !$allow_symlinks) { talk_log( (" " . CYAN BOLD . ".. symlink detected - transcoding/translating skipped for this file $name ($code)" . RESET . " .." ), $data, 1); next; } } # Check if the module's language has already been transcoded/translated, to avoid potential waste if (!@{$keys_test} && !@{$values_fix} && !$language_source_ignore_auto && $language_source_encoding eq 'map' && -r $mpath . "/$code.auto") { talk_log((" .. transcoding/translating skipped for language " . RED BOLD, $code, RESET . " .."), $data, 1); next; } while (my ($key, $value) = each %template) { # Don't add special keys next if ($key eq '__norefs'); next if ($key =~ /^\#/); # If set to test certain keys only, skip all other if (@{$keys_test} && (!any {$_ =~ /^$key$/} @{$keys_test})) { next; } # Reset to default to translations in text format $opt->{'translate-format-html'} = 0; # Automatically detect if we can upgrade to HTML mode if ($value =~ /<tt>.*?(\(\)|[_%$@=.:\/\\]+).*?<\/tt>/) { $opt->{'translate-format-html'} = 1; } # Modify `$key` when type is set to "module", otherwise keep default my $key_ = $key; my $key__ = $key; # When processing `module.info` file, ignore most keys # and add suffix to allowed to return expected format if ($type eq 'module') { if ($key !~ /name|desc|longdesc/) { next; } my $code_ = $code; $key__ = "${key}_${code}"; if ($language_source_encoding eq 'map') { $code_ = language_map($code); $key_ = "${key}_${code_}"; } else { $key_ = $key__; } } # This feature just checks and fixes values on target files (translations) # based on template language (def. en). This option expects a param set to `S1:R1,S2:R2`. # Later, all key's values will be checked for `S1` and `S2` in template file and replaced # with `R1` and `R2` accordingly on correspondent target files (translations). It can # be used to check for `<` or `>` on source file and replace existing `<` and `>` on the targets, # with `<` and `>`. If the string contains both search and replace, a warnign will be printed. if (@{$values_fix}) { my @search_and_replace = @{$values_fix}; my $search_and_replace_delimiter = $opt->{'values-fix-delimiter'}; my $value_auto = $language_source_file_auto->{$key_} if ($language_source_file_auto && $language_source_file_auto->{$key_}); my $value_human = $language_source_file->{$key_} if ($language_source_file && $language_source_file->{$key_}); my $value_auto_ = $value_auto if ($value_auto); my $value_human_ = $value_human if ($value_human); foreach my $tackle (@search_and_replace) { my @search_and_replace = split($search_and_replace_delimiter, $tackle); my $search = $search_and_replace[0]; my $replace = $search_and_replace[1]; my $conflict_log = sub { talk_log( ("" . MAGENTA . " .. Replacing string \`$replace\` was also found in original string for \`$key\` key. Skipping.." . RESET . "" ), $data, $opt->{'verbose'}); }; if (!$search || !$replace) { say RED, "Error: Cannot parse search replace pair", RESET; exit; } # Check for conflicts if ($value =~ /$search/ && $value =~ /$replace/) { &$conflict_log(); } else { if ($value_auto && $value =~ /$search/ && $value_auto =~ /$replace/) { $value_auto =~ s/$replace/$search/g; } elsif ($value_human && $value =~ /$search/ && $value_human =~ /$replace/) { $value_human =~ s/$replace/$search/g; } } } # Store either fixed or original value if ($value_human) { $language{$key__} = $value_human; } elsif ($value_auto) { $language_auto{$key__} = $value_auto; } my $_value_human = $value_human && $value_human ne $value_human_; my $_value_auto = $value_auto && $value_auto ne $value_auto_; # Print log talk_log(("" . BRIGHT_YELLOW . " .. Replaced found matches:" . RESET . ""), $data, 1) if ($_value_human || $_value_auto); talk_log(" — $key --> $value_human_", $data, 1), talk_log(" — $key <-- $value_human", $data, 1), $output++ if ($_value_human); talk_log(" — $key --> $value_auto_", $data, 1), talk_log(" — $key <-- $value_auto", $data, 1), $output++ if ($_value_auto); } else { # Previously translated strings from `$code.auto` file, e.g. `de.auto`, # should be considered, unless forced to be dropped if ($language_source_file_auto && $language_source_file_auto->{$key_} && !$language_source_file->{$key_} && (!@{$keys_force_translate} || any {$_ !~ /^$key_$/} @{$keys_force_translate}) && !@{$keys_test} && !$language_source_ignore_auto) { !$verbose_silent_mode && talk_log( ("" . MAGENTA . " .. ($code.auto) found and stored machine translated value for \`$key\` key to" . RESET . "" ), $data, $opt->{'verbose'}); # Add a string to main language file $language_auto{$key__} = $language_source_file_auto->{$key_}; $output++; } # Human translated strings must be added to original `$code` file, e.g. `de` after transcoding elsif ($language_source_file && $language_source_file->{$key_} && !@{$keys_test}) { !$verbose_silent_mode && talk_log( ("" . DARK . " .. ($code) found and transcoded human translated value for \`$key\` key to" . RESET . "" ), $data, $opt->{'verbose'}); # We need to re-encode the string first, depending on which human translated file is being # used (iso or UTF-8). We still need to convert all back and forth to get rid of ugly &#... escapes if (!$mode_sync) { $language_source_file->{$key_} = language_transcode($language_source_file->{$key_}, $language_source_file_encoding, $opt); talk_log(" — " . WHITE . " $language_source_file->{$key_}" . RESET . "", $data, $opt->{'verbose'}); } # If we need to transcode double encoded string, containing both &#... and utf-8 chars if ($mode_transcode) { $language_source_file->{$key_} = language_transcode($language_source_file->{$key_}, 'utf-8', $opt); $output_++; } # Add a string to main language file $language{$key__} = $language_source_file->{$key_}; $output++; } # Machine translated strings must be added to `$code.auto` file, e.g. `de.auto` else { # Skip steps below, and only transcode existing values if ($opt->{'only-transcode'} || $mode_transcode) { next; } # Add original value, from source language, to `$code.auto` file without # performing actual translation. It's useful when you want to translate it manually # and simply need to find missing language strings on targeted file. Besides, `config.info` # options are never translated (so far), just as languages that use right-to-left writing if (!@{$keys_test} && ($opt->{'only-diff'} || $type eq 'config' || $type eq 'uconfig' || ($code eq 'ur' && $value =~ /</) || (any {$_ =~ /^$key$/} @{$keys_exclude}))) { if (!$verbose_silent_mode) { talk_log( ("" . DARK . " .. ($code) stored original value for \`$key\` which is" . RESET . "" ), $data, $opt->{'verbose'}); talk_log(" — " . CYAN . " $value" . RESET . "", $data, $opt->{'verbose'}); } $language_auto{$key__} = $value; next; } # Force to use HTML on certain keys only # It's useful to preserve certain lines intact, like # "HIGH:!aNULL:!MD5" when in text mode by defaul if (any {$_ =~ /^$key$/} @{$keys_force_html}) { $opt->{'translate-format-html'} = 1; } # Add translated string talk_log(("" . BRIGHT_MAGENTA . " .. ($code) translated value for \`$key\` to" . RESET . ""), $data, $verbose); # Prepare sent text my $_value = $value; $value = translate_substitute($value, $opt); # Run actual translation my $translated = translate($data, $opt, $code, $value); talk_log(" — $key --> $_value", $data, $verbose); my $translated_ = $translated; $translated = translated_substitute($translated, $_value, $code, $rtl, $opt); # Store translated string $language_auto{$key__} = $translated; talk_log(" — $key <-- $translated", $data, $verbose); # Print debug log if ($verbose == 2) { say CYAN, " --< $value", RESET; say CYAN, " >-- $translated_", RESET; } $output++; $output_++; } } } # If we are converting from old times, where there were different encodings and # file extensions for it, we need to delete/clear all old, no longer needed files # e.g. will be deleted: uk_UA, zh_TW.Big5, ja_JP.euc, ko_KR.euc and etc. my $file_deleted = ""; if (!@{$keys_test} && $language_source_encoding eq 'map') { my $found; find( { wanted => sub { $found = $File::Find::name; my $code_ = language_map($code); if ($code ne $code_ && ( $found eq "$mpath/$code_" || $found eq "$mpath/$code.UTF-8" || $found eq "$mpath/$code_.UTF-8" || $found eq "$mpath/ru_SU" || (defined($opt->{'type'}) && $opt->{'type'} && $opt->{'type'} =~ /config|uconfig|module/ && ($found eq "$mpath/$opt->{'type'}.info.$code_")))) { unlink($found); $file_deleted .= " $found"; $found =~ s/$mpath\///; talk_log( ("" . YELLOW . " .. Found no longer used language file ($found) and deleted .." . RESET . "" ), $data, 1); } }, follow => $allow_symlinks, }, $mpath); } # Write transcoded/translated file if (!@{$keys_test}) { # Prepend file name for `config.info` or `module.info` modes if (defined($opt->{'type'}) && $opt->{'type'} =~ /config|uconfig|module/) { $code = "$opt->{'type'}.info.$code"; } # Language suffix for automatics my $language_suffix = 'auto'; if ($opt->{'only-diff'} && !$opt->{'only-diff-auto'}) { $language_suffix = 'diff'; } my $file = $mpath . "/$code"; my $file_auto = $mpath . "/$code.$language_suffix"; write_file($file, \%language, undef, undef, $mfile, 1), chmod(0664, $file) if (%language); write_file($file_auto, \%language_auto, undef, undef, $mfile, 1), chmod(0664, $file_auto) if ((%language_auto || !%language_auto) && !$opt->{'only-transcode'}); # Remove empty files unlink($file) if (-z $file); unlink($file_auto) if (-z $file_auto); # Git auto-commit after language was transcoded/translated if ($git_commit) { # my $gs = "$module"; my $gf = trim("$file $file_auto $file_deleted"); say GREEN, ".. creating Git commit to current repo for " . BRIGHT_YELLOW . "$gf" . RESET . " files .."; system("git add $gf"); system("git commit $gf -m 'Add transcoding/translation for \`$module\` module of $name ($code)'"); } } if (!$output_ && $mode_sync && $language_source_encoding eq 'utf-8' && !@{$keys_test} && !@{$values_fix}) { talk_log( ("" . BRIGHT_CYAN . " .. All strings are in sync already for $name ($code) language" . RESET . ""), $data, 1); } if ($output_ && $mode_transcode) { talk_log( ("" . BRIGHT_GREEN . " .. All double-encoded strings were recovered for $name ($code) language" . RESET . "" ), $data, 1); } } if (!$output) { talk_log(("Nothing to do for " . BLUE . "$module" . RESET . " module .."), $data, 1); } say GREEN, ".. done ", RESET; } } } sub get_google_translate_token { my $gc = `gcloud -v 2>&1`; if (!$gc // $gc !~ /Google Cloud SDK/) { errors('gcloud-missing'); return 0; } my $token = `gcloud auth application-default print-access-token`; if ($token =~ /ERROR:/) { errors('gcloud-error', $token); return 0; } else { return $token; } } sub prompt { my ($q, $opt) = @_; if ($opt->{'assumeyes'}) { return 1; } if ($q eq 'next') { return prompt("Do you want to proceed?"); } my $p = sub { my ($q, $c) = @_; $q = $c if ($c); local $| = 1; print DARK, "$q", RESET; chomp(my $a = <STDIN>); $a = lc($a) if ($a); return lc($a); }; my $a = &$p("$q [y/N]: "); if ($a ne 'y' && $a ne 'n') { my $repeat = ("" . BRIGHT_MAGENTA . "Please enter" . RESET . " " . YELLOW . "[y]" . RESET . " " . MAGENTA . "or" . RESET . " " . YELLOW . "[n]" . RESET . ": "); $a = &$p(undef, $repeat); if ($a ne 'y' && $a ne 'n') { $a = &$p(undef, $repeat); } } return $a eq 'y'; } sub ask { my ($q) = @_; my $p = sub { my ($q) = @_; local $| = 1; print DARK, "$q", RESET; chomp(my $a = <STDIN>); return $a; }; my $a = &$p("$q: "); return $a; } sub errors { my ($e, $error) = @_; my $message; if ($e eq 'language-target') { $message = "Error: Using \`$error\` as a target language(s) is refused. The value(s) should match to one of the language codes from the language list file."; } if ($e eq 'language-source') { $message = "Error: Using \`$error\` as a source language is refused. The value should match to one of the language codes from the language list file."; } if ($e eq 'gcloud-missing') { $message = "Error: Command \`gcloud\` to manage Google Cloud Platform resources and developer workflow is not available.",; } if ($e eq 'gcloud-error') { $message = "Error: $error"; } say RED, $message, RESET; } sub talk_log { my ($what, $data, $output_to_console) = @_; # Output to console if ($output_to_console) { say $what; } # Store to log my $out = $data->{'out'}; if ($out) { $what =~ s/\[\d+m//g; say $out $what; } } sub talk { my ($what, $opt, $data) = @_; if ($what eq 'affected') { my $affected = "@{$data->{'modules'}}"; my $affected_count = scalar(split(' ', $affected)); $affected =~ s/\b(lang)\b/(lang)/; if ($affected) { say GREEN, "Affected modules " . RESET, DARK . "[$affected_count]" . RESET . ": ", YELLOW BOLD, $affected, RESET; } else { say RED, "Error: No modules found to operate on", RESET; exit; } my $languages = ("@{$opt->{'language-target'}}" || "@{$data->{'languages_source_list_codes'}}"); my $languages_count = scalar(split(' ', $languages)); say GREEN, "Affected languages" . RESET, DARK . " [$languages_count]" . RESET . ": ", YELLOW BOLD, "" . $languages . "", RESET; } if ($what eq 'clean-pre') { say RED, "Danger! ", RESET, WHITE, "The following operation will delete all files related to the affected lang-\nuage, including machine-translated files in all the mentioned modules listed above.", } if ($what eq 'overwrite-pre') { say RED, "Warning! ", RESET, WHITE, "The following operation will force-translate and overwrite mentioned\nlanguages in all mentioned modules listed above, using ", YELLOW BOLD, $opt->{'language-source'}, RESET, " as a template lang-\nuage. The following directory " . YELLOW BOLD, $data->{'path'}, RESET, " with human translated\nmodule strings, will be used instead of machine translations. The operation\nlog will be written in " . YELLOW BOLD, $opt->{'log'}, RESET, " file.", RESET; } if ($what eq 'sync-pre') { say BRIGHT_MAGENTA, "Attention! ", RESET, WHITE, "The following operation will synchronize mentioned languages,\nfor all mentioned modules listed above, using ", YELLOW BOLD, $opt->{'language-source'}, RESET, " as a template language.\nThe keys found in target languages but missing on source, will be deleted,\nwhile newly added keys on source, will be translated for targets. The ope-\nration log will be written in " . YELLOW BOLD, $opt->{'log'}, RESET, " file.", RESET; } if ($what eq 'fix-pre') { say BRIGHT_BLUE, "Attention! ", RESET, WHITE, "The following operation will search for matching string in template\nfile ", YELLOW BOLD . "(" . $opt->{'language-source'} . ")" . RESET, " and replaces matches on translation files accordingly. The operation\nlog will be written in " . YELLOW BOLD, $opt->{'log'}, RESET, " file.", RESET; } if ($what eq 'transcode-pre') { say BRIGHT_BLUE, "Attention! ", RESET, WHITE, "The following operation will recover double-encoded strings in targeted lan-\nguages. The operation log will be written in " . YELLOW BOLD, $opt->{'log'}, RESET, " file.", RESET; } } sub trim { my $s = shift; $s =~ s/^\s+|\s+$//g; return $s; } =pod =head1 NAME language-manager =head1 DESCRIPTION Manage Webmin/Usermin module language files (lang|ulang|help|config|uconfig|module), to perform transcoding, translation, spell check and add/remove language strings. =head1 SYNOPSIS webmin language-manager [options] =head1 OPTIONS =over =item --help, -h Give this help list. Examples of usage: Synchronize all language keys for Apache module, based on template language. Newly added entries to Apache template language file (def. en), will be translated and inserted into all other machine translated language files, while deleted entries will also be removed on all targets (translations), including human translated files. The value for "defines_desc" will be force re-translated and translation will be done in HTML format. - webmin language-manager -m=apache -kft=defines_desc -kfh=defines_desc Synchronize all modules' "help/" language files. Newly added files will be translated and already translated, both human and machine translations, will be kept intact. - webmin language-manager -w=help Check for Software Packages module, all matching escaped HTML "<" and ">" entities in template file (def. en), and if found, make sure that translations (including machine translations) for the same key, contain exact escaped HTML entities, as on template string, rather than "<" or ">". Technically, it's possible to check and replace anything on language files using this command. - webmin language-manager -vf="<:<,>:>" -m=software Translate all available languages, using old-time encoding map, for BIND module, using as type "lang" directory, discarding human translations for Hebrew, keeping original value (not translating) for key "mass_desc", and printing verbose output. - webmin language-manager -x=full -e=map -m=bind8 -w=lang -se=he -ke=mass_desc Transcode only and rename all modules' "help/" old-time format files. Old files, such as "ja_JP.euc.html", "ko_KR.euc.html", "zh_TW.Big5.html" and "ru_RU.html" will be automatically renamed and/or deleted. - webmin language-manager -w=help -e=map -ot Only transcode language files, using old-time encoding map, from files being in different encodings, to new style, where all language files are in "utf-8" encoding, for Apache module. No translations will be made, and no ".auto" files will be created. - webmin language-manager -x=full -e=map -m=apache -ot Repare human translated language files, which stored in "utf-8" encoding already but still have HTML entities. - webmin language-manager -m=virtual-server -t=no,es -x=transcode Test translations for "index_stopmsg,trusted_warning" keys, in Russian and German languages, in BIND module, print on-screen results and exit. - webmin language-manager -m=bind8 -t=ru,de -kt=index_stopmsg,trusted_warning Clean build directory by removing all kind of language files except English. - webmin language-manager --mode=clean =item --mode, -x <sync|full|transcode> Mode can be either <sync> or <full> or <transcode> or <clean>. Default is set to "sync" and will only keep the keys found on template language file, while missing keys in target languages (translations), will be deleted, and newly added keys to template language file, will be translated. Mode "full" is meant to perform full translation, keeping human translated strings and overwriting all machine translations done in the past (not recommended to run). Mode "transcode" is useful to fix human translated language files, which stored in "utf-8" encoding already, while still having "í" or "é" HTML entities. Mode "clean" is useful to clean build package by removing all kind of language files except source language. =item --type, -w <lang|ulang|help|config|uconfig|module> Type of target to use for operations. Either <lang>, <ulang>, <help> directories or language info file, as <config> or <module>. Default is set to "lang" for Webmin and "ulang" for Usermin. =item --modules, -m Comma separated list of modules to operate on. Default is to operate on all available modules. =item --modules-exclude, -me Comma separated list of modules to exclude. =item --language-target, -t Comma separated list of affected language codes to operate on. Default is to operate on all languages defined in language list. =item --language-target-exclude, -te Comma separated list of target language codes that should be excluded from processing. =item --language-source, -s Language to use as a base language. Default is set English. =item --language-source-exclude, -se Comma separated list of human translated language codes to exclude from adding to target file (always translate instead). It might be useful if human translated files are low quality or corrupted. =item --language-source-ignore-auto, -sia Drop all previous machine translations (those that are stored in ".auto" files) and re-run translations over again - this option is highly not recommended to use. =item --language-source-encoding, -e Encoding of human translated language files. Default is set to "utf-8". Available options are <utf-8>, <auto>, <map>. Option "auto" is used to detect encoding automatically, while option "map" is used to use old-time encoding map, where each language file had different encoding, and nowadays, should not be used, as all language files are going to be encoded in "utf-8" already. It's always best to avoid using this option. =item --only-diff, -o Use this option to extract a difference and find missing keys on target language (against template) and save currently untranslated strings to a separate file, with ".diff" extension. =item --only-transcode, -ot Use this option to simply convert old-time style module language files (where each language had different encoding) into new format, with all files encoded in "utf-8". Old files, such as "ja_JP.euc", "ko_KR.euc", "zh_TW.Big5" and "ru_RU" will be automatically deleted. =item --keys-exclude, -ke Comma separated list of keys that shouldn't be translated and stored with original values. =item --keys-force-translate, -kft Comma separated list of keys that will be force re-translated, even if they were already machine translated. Using this option will not affect human translated keys. =item --keys-force-html, -kfh Comma separated list of keys that will be translated in HTML format. It's useful to strictly preserve content inside <tt></tt> tags when translations are done in text mode by default. =item --keys-test, -kt Comma separated list of keys to test translation engine on; in this mode, translated strings are not saved. =item --values-fix, -vf Colon separated pair of values to search and replace on translations files but only if template file (def. en) contains it. =item --values-fix-delimiter, -vfd If not set, by default, colon is used as separator for search/replace values in fix command. =item --git-commit, -gc Create a separate commit to the current repo, after each language was transcoded/translated. =item --log, -l Saves complete operation log. By default, log file is saved to "/tmp/language-manager-{timestamp}.txt" file. =item --verbose, -v Verbosely print processed files and provide detailed output. By detault, verbose output is enabled. =back =head1 LICENSE AND COPYRIGHT Copyright 2020 Ilia Rostovtsev <ilia@virtualmin.com>Private