langpacks.pl 13.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
#!/usr/bin/perl -w

# Copyright (C) 2010 Catalyst IT Ltd
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

18 19 20 21 22 23 24 25 26 27 28 29 30 31
###########################
# Mahara langpacks generation file.
#
# This script pulls the latest .po translation files from all the translation
# branches, processes them into Mahara language files, and compresses them
# into tarballs.
#
# NOTE: It contains multiple hard-coded lists of branch names to get translations
# from. Whenever there's a new Mahara series, you'll need to manually update those
# lists. Each one has been tagged with this comment:
#
#     # @UPDATE when there is a new series
#

32 33
use Data::Dumper;
use FindBin;
34
use File::Path qw(mkpath rmtree);
35
use LWP::UserAgent;
36

37
foreach my $c (qw(DATA DOCROOT SCRIPTS)) {
38 39 40 41 42
    exists $ENV{$c} or die ("\$ENV{$c} undefined");
}

my $DATA      = $ENV{DATA};
my $DOCROOT   = $ENV{DOCROOT};
43
my $SCRIPTS   = $ENV{SCRIPTS};
44

45
my $CLEANCMD  = "/usr/bin/php $FindBin::Bin/clean-php.php";
46 47 48 49 50
my $SYNTAXCMD = "/usr/bin/php -l";
my $UTF8CMD   = "/usr/bin/perl $FindBin::Bin/check-utf8.pl";
my $POCMD     = "/usr/bin/perl $FindBin::Bin/po-php.pl";

my $GITDIR    = "${DATA}/git";
51
my $BZRDIR    = "${DATA}/bzr";
52 53 54
my $DIRTY     = "${DATA}/old";
my $CLEAN     = "${DATA}/new";
my $TARBALLS  = "${DATA}/tarballs";
55 56
my $MAHARA    = "${DATA}/mahara";

57
my $MAHARAREMOTE = 'https://git.mahara.org/mahara/mahara.git';
58
my $REPOLIST     = 'https://git.mahara.org/scripts/mahara-scripts/raw/master/mahara-langpacks/language-repos.txt';
59

60 61 62 63
mkpath $GITDIR;
mkpath $DIRTY;
mkpath $CLEAN;
mkpath $TARBALLS;
64 65 66

print STDERR "Checking langpacks for updates: " . `date \"+%Y-%m-%d %H:%M:%S\"`;

67
# A language repo list can be put in the $DATA dir for testing.  If there's not one
68
# there, try to get an up-to-date one out of the mahara-scripts repository
69 70
# (allows updates to the repo list without having to redeploy the package).
my $repolist;
71 72 73
if ( -f "$DATA/language-repos.txt" ) {
    print STDERR "Using repository list in $DATA/language-repos.txt\n";
    open $repofh, '<', "$DATA/language-repos.txt" or die $!;
74 75 76 77
    local $/ = undef;
    $repolist = <$repofh>;
}
else {
78
    print STDERR "Retrieving repository list from $REPOLIST\n";
79 80 81 82

    my $ua = LWP::UserAgent->new;
    $ua->timeout(10);
    $ua->env_proxy;
83
    my $response = $ua->get($REPOLIST);
84
    $repolist = $response->is_success ? $response->content : undef;
85 86 87 88 89

    if ( ! defined $repolist ) {
        print STDERR "Failed to get language list, exiting.\n";
        exit;
    }
90 91 92 93 94
}

my %langs = ();
if ( defined $repolist ) {
    foreach ( split "\n", $repolist ) {
95
        if ( m/^([a-zA-Z_]{2,5})\s+(\S+)\s*$/ ) {
96 97 98 99 100 101 102
            $langs{$1} = { repo => $2 };
        }
    }
}

my @langkeys = sort keys %langs;
if ( scalar @langkeys < 1 ) {
103
    @langkeys = qw(ar ca cs da de el en_GB en_us es eu fi fr he it ja ko mi nl nb ru sl zh_TW);
104 105 106
}

print STDERR "Languages: " . join(' ', @langkeys) . "\n";
107 108 109 110 111 112 113 114 115 116

my $last;
my $savefile = "$TARBALLS/mahara-langpacks.last";
if ( -f $savefile ) {
    eval(`cat $savefile`);
}
else {
    $last = {};
}

117

118 119 120 121 122 123 124
# Mahara checkout, used for reading the en.utf8 langpack
# during php langpack sanitisation.
! -d "$MAHARA" && system "git clone --quiet $MAHARAREMOTE $MAHARA";
chdir $MAHARA;
system "git fetch --quiet origin";


125
# For launchpad, all languages are in a single branch, so update the lot
126
system "bzr launchpad-login dev-mahara";
127
! -d $BZRDIR && system "bzr init-repo $BZRDIR";
128
# @UPDATE when there is a new series
129
my @branches = qw(15.04_STABLE 15.10_STABLE 16.04_STABLE 16.10_STABLE master);
130 131 132

foreach my $branch (@branches) {
    if ( ! -d "$BZRDIR/$branch" ) {
133
        system "bzr branch lp:~mahara-lang/mahara-lang/$branch-export $BZRDIR/$branch";
134 135
    }
    else {
136 137
        chdir "$BZRDIR/$branch";
        system "bzr pull";
138 139 140
    }
}

141
foreach my $lang (@langkeys) {
142 143

    if ( ! defined $last->{$lang} ) {
144
        $last->{$lang} = { repo => "https://git.mahara.org/lang-old/$lang.git" };
145 146
    }

147 148 149 150
    if ( defined $langs{$lang}->{repo} ) {
        $last->{$lang}->{repo} = $langs{$lang}->{repo};
    }

151
    my $repotype;
152 153 154 155 156
    my $remote       = $last->{$lang}->{repo};
    my $gitlangdir   = "$GITDIR/$lang";
    my $dirtylangdir = "$DIRTY/$lang";
    my $cleanlangdir = "$CLEAN/$lang";

157 158
    mkpath $dirtylangdir;
    mkpath $cleanlangdir;
159 160


161 162
    if ( $remote =~ m/^lp:mahara-lang/ ) {
        $repotype = 'launchpad';
163
        # @UPDATE when there is a new series
164
        @branches = qw(15.04_STABLE 15.10_STABLE 16.04_STABLE 16.10_STABLE master);
165
    }
166
    elsif ( $remote =~ m{^https://git\.mahara\.org|^https://gitlab\.com} ) {
167
        $repotype = 'git';
168 169 170
        ! -d "$gitlangdir" && system "git clone --quiet $remote $gitlangdir";
        chdir $gitlangdir;
        system "git fetch --quiet";
171
        # @UPDATE when there is a new series
172 173
        # When adding a new branch name here, make sure to preface it with 
        # "\|", that is, a backslash, then a pipe.
174
        my $remotebranchcmd = 'git branch -r | grep -v "HEAD" | grep "origin\/\(master\|15.04_STABLE\|15.10_STABLE\|16.04_STABLE\|16.10_STABLE\)$"';
175 176 177 178 179 180 181 182 183 184 185 186
        my $remotebranches = `$remotebranchcmd`;
        $remotebranches =~ s/\s+/ /;
        @branches = ();
        foreach my $b (split(" ", $remotebranches)) {
            $b =~ s{^origin/}{};
            push @branches, $b;
        }
    }
    else {
        print STDERR "Don't know what to do with $remote; skipping $lang\n";
        next;
    }
187

188
    foreach my $branch (@branches) {
189

190 191 192 193 194
        my $remotecommit;
        my $currentdir;
        if ( $repotype eq 'launchpad' ) {
            $currentdir = "$BZRDIR/$branch";
            chdir $currentdir;
195
            next if ! -f "$currentdir/mahara/$lang.po";
196 197 198 199 200 201 202 203 204
            my $remotecommitcmd = "bzr log --line mahara/$lang.po | head -1";
            $remotecommit = `$remotecommitcmd`;
        }
        else {
            my $remotecommitcmd = 'git log --pretty=format:"%H %ai %an" origin/' . $branch . ' | head -1';
            $remotecommit = `$remotecommitcmd`;
            $currentdir = $gitlangdir;
            chdir $currentdir;
        }
205 206
        chomp $remotecommit;

207 208
        if ( ! defined $last->{$lang}->{branches}->{$branch} ) {
            $last->{$lang}->{branches}->{$branch} = {};
209 210
        }

211
        my $filenamebase = "$lang-$branch";
212 213 214 215 216 217 218 219
        my $tarball = "$TARBALLS/$filenamebase.tar.gz";
        my $diff    = "$TARBALLS/$filenamebase.diff";

        -f $tarball && unlink $tarball;
        -f $diff && unlink $diff;

        my $lastruncommit = '';

220 221
        if ( defined $last->{$lang}->{branches}->{$branch}->{commit} ) {
            $lastruncommit = $last->{$lang}->{branches}->{$branch}->{commit};
222 223 224
        }

        if ( "$remotecommit" ne "$lastruncommit" ) {
225
            print STDERR "Updating $lang $branch\n";
226

227
            if ( $repotype eq 'git' ) {
228 229
                my $branchcmd = 'git branch | grep "' . $branch . '$"';
                my $branchexists = `$branchcmd`;
230

231 232 233 234 235 236 237
                if ( length $branchexists ) {
                    system "git checkout --quiet $branch";
                    system "git reset --hard -q origin/$branch";
                }
                else {
                    system "git checkout --quiet -b $branch origin/$branch";
                }
238 239
            }

240 241
            $last->{$lang}->{branches}->{$branch}->{status} = 0;
            $last->{$lang}->{branches}->{$branch}->{errors} = '';
242

243
            my $cleanbranchdir = "$cleanlangdir/$branch";
244 245
            -d "$cleanbranchdir/lang" && rmtree $cleanbranchdir;
            ! -d $cleanbranchdir && mkpath $cleanbranchdir;
246

247
            my $pofile = "$currentdir/mahara/$lang.po";
248 249 250

            if ( -f $pofile ) {

251
                $last->{$lang}->{branches}->{$branch}->{type} = 'po';
252

253
                print STDERR "$lang $branch: using .po file\n";
254 255 256 257

                # Check utf8ness of .po file?
                my $output = `$UTF8CMD $pofile`;
                if ( length $output ) {
258 259
                    $last->{$lang}->{branches}->{$branch}->{errors} = "$pofile\n$output";
                    $last->{$lang}->{branches}->{$branch}->{status} = -1;
260 261 262 263 264 265 266
                }

                # Create langpack from .po file
                my $pocmd = "$POCMD $pofile $cleanbranchdir \"$lang.utf8\"";
                $output = `$pocmd`;

                if ( length $output ) {
267 268 269
                    $last->{$lang}->{branches}->{$branch}->{errors} .= "Failed to create langpack from .po file $pofile\n";
                    $last->{$lang}->{branches}->{$branch}->{errors} .= "$output";
                    $last->{$lang}->{branches}->{$branch}->{status} = -1;
270 271
                }

272
            }
273
            elsif ( $repotype eq 'git' ) {
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290

                # .po is not available, so this is a php langpack

                my $langconfig = 0;

                if ( $lang =~ m/^([a-z]{2})_([a-z]{2})$/ ) {
                    $langconfig = -f "$currentdir/lang/$1_" . lc($2) . '.utf8/langconfig.php'
                      || -f "$currentdir/lang/$1_" . uc($2) . '.utf8/langconfig.php';
                }
                else {
                    $langconfig = -f "$currentdir/lang/$lang.utf8/langconfig.php";
                }

                if ( ! $langconfig ) {
                    print STDERR "$lang $branch: Couldn't find lang/$lang.utf8/langconfig.php in $currentdir; skipping\n";
                    next;
                }
291

292
                $last->{$lang}->{branches}->{$branch}->{type} = 'mahara';
293

294
                print STDERR "$lang $branch: sanitising\n";
295 296

                # sanitise langpack
297
                my $dirtybranchdir = "$dirtylangdir/$branch";
298
                ! -d $dirtybranchdir && mkpath $dirtybranchdir;
299

300
                system("cp -r $currentdir/" . '[a-z]* ' . $dirtybranchdir);
301

302 303 304 305
                # Make en.utf8 available
                chdir $MAHARA;
                system "git reset --hard -q origin/$branch";

306
                # Clean out stray php from the langpacks
307
                system "$CLEANCMD $MAHARA/htdocs $dirtybranchdir $cleanbranchdir";
308 309 310 311 312 313 314 315 316 317 318 319

                chdir $DATA;
                system "diff -Bwr $dirtybranchdir $cleanbranchdir > $diff";

                # Check syntax of php files
                chdir $cleanbranchdir;
                my $phpfiles = `find . -name \"\*.php\"`;
                foreach my $phpfile (split("\n", $phpfiles)) {
                    $phpfile =~ s/^\s*(\S.*\S)\s*$/$1/;
                    if ( $phpfile =~ m/php$/ ) {
                        my $output = `$SYNTAXCMD $phpfile >/dev/null`;
                        if ( length $output ) {
320 321
                            $last->{$lang}->{branches}->{$branch}->{errors} = "$phpfile\n$output";
                            $last->{$lang}->{branches}->{$branch}->{status} = -1;
322 323 324 325 326 327 328 329 330 331 332
                        }
                    }
                }

                my $allfiles = `find .`;

                # Check utf8ness of all files
                foreach my $file (split("\n", $allfiles)) {
                    $file =~ s/^\s*(\S.*\S)\s*$/$1/;
                    $output = `$UTF8CMD $file`;
                    if ( length $output ) {
333 334
                        $last->{$lang}->{branches}->{$branch}->{errors} .= "$file\n$output";
                        $last->{$lang}->{branches}->{$branch}->{status} = -1;
335 336 337
                    }
                }
            }
338
            else {
339
                print STDERR "$lang $branch: Couldn't find mahara/$lang.po or lang/$lang.utf8/langconfig.php in $currentdir; skipping\n";
340 341
                next;
            }
342

343
            if ( $last->{$lang}->{branches}->{$branch}->{status} == 0 ) {
344
                my $strip = $cleanbranchdir;
345
                $strip =~ s{^/}{^};
346 347 348
                system "tar --transform \"s,$strip,$lang.utf8,\" -zcf $tarball $cleanbranchdir";
            }

349
            chdir $currentdir;
350

351
            my $localcommit;
352
            if ( $repotype eq 'git' ) {
353 354 355 356 357
                $localcommit = `git log --pretty=format:\"%H %ai %an\" $branch | head -1`;
            }
            else {
                $localcommit = `bzr log --line mahara/$lang.po | head -1`;
            }
358
            chomp $localcommit;
359
            $last->{$lang}->{branches}->{$branch}->{commit} = $localcommit;
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
        }
    }
}

# Move new tarballs & log files to web directory
foreach my $file (split /\n/, `find $TARBALLS -name \"\*.tar.gz\"`) {
    system "mv $file $DOCROOT";
}

foreach my $file (split /\n/, `find $TARBALLS -name \"\*.diff\"`) {
    my $base = $file;
    $base =~ s{^.*/([^/\s]+)\.diff\s*$}{$1};
    system "mv $file $DOCROOT/$base-diff.txt";
}

# Generate index.html
system "/usr/bin/perl $FindBin::Bin/generate-index.pl $DOCROOT";

# Save latest commits
open $savefh, '>', $savefile;
print $savefh Data::Dumper->Dump([$last], ['last']);

# Generate status.html
system "/usr/bin/perl $FindBin::Bin/generate-status.pl $TARBALLS $DOCROOT";

print STDERR "Done.\n";