#!/usr/bin/perl -w use strict; $| = 1; use Unicode::String qw(latin1 utf8); #################################################################### # Fix Umlaute # Works on UTF _and_ ISO hosts :-) # # -- http://www.candy.pgate.net/shop/.tpl/ger/.cms/fixumlaute_pl.txt my $VERSION = '0.81.02'; #################################################################### ##### PROTOTYPING sub Debug($); ##### GLOBAL my $DEBUG = 1; # 0, 1, 2 -- ZZ my $KEEP_OWNER = 'nobody'; # ZZ my $PREFIX = $ARGV[0] || 'cms_doc_'; my $FILE_COUNT = 0; $SIG{'__WARN__'} = \&alarm_handler; # install signal handler &main(); ################################################## sub main() { ################################################## opendir(D, '.') or print STDERR $!; while (my $f = readdir(D)) { next unless $f =~ /^$PREFIX.+\.html?$/; next if $f =~ /_bak\d+\.html?$/; print "- $f\n"; my $rc = &checknfixit(file => $f); print STDERR "ERROR at $f\n" if $rc; } closedir(D); print "$FILE_COUNT files have been updated.\n"; } ################################################## sub checknfixit (%) { ################################################## my %args = @_; my $errstr = ''; open(IN, "<$args{'file'}" ) or $errstr .= "(1) $! "; open(OUT, ">.tmp_$args{'file'}") or $errstr .= "(2) $! "; if ($errstr) { print STDERR "ERR E1000: $errstr ($args{'file'})\n"; return 1; } my $umlaute_found = 0; while (my $in = ) { if (is_ascii($in)) { print OUT $in; # as is } else { $umlaute_found++; my $text_iso = (utf8($in))->latin1; my $text_utf8 = (latin1($text_iso))->utf8; # reverse check my $input = $text_iso; $input = $in if $in ne $text_utf8; # Is ISO already! print OUT &web_enc($input); # newline included } } close IN; close OUT; if ($umlaute_found) { rename ".tmp_$args{'file'}", $args{'file'} or print STDERR "ERR (rename) $!\n"; system("chown $KEEP_OWNER $args{'file'}") && print STDERR "ERR $!\n"; Debug " Converted Umlaute on $umlaute_found lines."; $FILE_COUNT++; } else { unlink ".tmp_$args{'file'}" or print STDERR $!; Debug " Leaving $args{'file'} unchanged."; } 0; } sub is_ascii($) { my $in = $_[0]; for (my $i = 0; $i < length($in); $i++) { if (unpack('C', substr($in, $i, 1)) > 127) { return 0; } } return 1; } sub web_enc ($) { my $enc = ''; for (my $i = 0; $i < length($_[0]); $i++) { my $ordno = ord substr($_[0], $i, 1); $enc .= $ordno > 127 ? sprintf("&#%d;", $ordno) : substr($_[0], $i, 1); } $enc =~ s/ $//;; $enc; } sub Debug($) { print "$_[0]\n"; } sub alarm_handler () { #print STDERR "alarm catched!\n"; return; } __END__ E1001 -- /usr/local/apache2/htdocs/www.candy.pgate.net/shop/.tpl/ger/.cms/fixumlaute.pl