Spaces:
Sleeping
Sleeping
| #!/usr/bin/perl -w | |
| # uroman Nov. 12, 2015 - July 25, 2016 | |
| # version v0.7 | |
| # Author: Ulf Hermjakob | |
| # Usage: uroman-quick.pl {-l [tur|uig|ukr|yid]} < STDIN | |
| # currently only for Arabic script languages, incl. Uyghur | |
| $|=1; | |
| use FindBin; | |
| use Cwd "abs_path"; | |
| use File::Basename qw(dirname); | |
| use File::Spec; | |
| my $bin_dir = abs_path(dirname($0)); | |
| my $root_dir = File::Spec->catfile($bin_dir, File::Spec->updir()); | |
| my $data_dir = File::Spec->catfile($root_dir, "data"); | |
| my $lib_dir = File::Spec->catfile($root_dir, "lib"); | |
| use lib "$FindBin::Bin/../lib"; | |
| use NLP::Romanizer; | |
| use NLP::UTF8; | |
| $romanizer = NLP::Romanizer; | |
| %ht = (); | |
| $lang_code = ""; | |
| while (@ARGV) { | |
| $arg = shift @ARGV; | |
| if ($arg =~ /^-+(l|lc|lang-code)$/) { | |
| $lang_code = lc (shift @ARGV || "") | |
| } else { | |
| print STDERR "Ignoring unrecognized arg $arg\n"; | |
| } | |
| } | |
| $romanization_table_arabic_block_filename = File::Spec->catfile($data_dir, "romanization-table-arabic-block.txt"); | |
| $romanization_table_filename = File::Spec->catfile($data_dir, "romanization-table.txt"); | |
| $romanizer->load_romanization_table(*ht, $romanization_table_arabic_block_filename); | |
| $romanizer->load_romanization_table(*ht, $romanization_table_filename); | |
| $line_number = 0; | |
| while (<>) { | |
| $line_number++; | |
| my $line = $_; | |
| print $romanizer->quick_romanize($line, $lang_code, *ht) . "\n"; | |
| if ($line_number =~ /0000$/) { | |
| print STDERR $line_number; | |
| } elsif ($line_number =~ /000$/) { | |
| print STDERR "."; | |
| } | |
| } | |
| print STDERR "\n"; | |
| exit 0; | |