#!/usr/bin/perl # a40402.pl 2008-12-4 by akebia # Nippongo search string decoder for Webalizer # http://nyan.co.uk/akebia/item/81 use Jcode; @search_engine = ( ['[^/]*/search\?', 'q=', '=', 'q=cache:.+\+.*', 'h', 'www.google.com', 's', '/([&\?])q=cache:.+\+/$1q=/', ';'], ['[^/]*/translate_c\?', 'q=', '=', 'prev=/.*', 'h', 'www.google.com', ';'], ['google\.', 'q=', '=', 'prev=/.*', 's', '/\/[^\/]*(\?|\?.*&)prev=\//\//', 'u', ';'], ['google\.', 'q=', '=', 'ie=[^uU].*'], ['google\.', 'q=', '=', 'ie=[uU].*', '.'], ['google\.', 'q=', 'e', 'btnG=.*\x8c\x9f\x8d\xf5.*'], ['google\.', 'q=', 'e', 'btnG=.*\xe6\xa4\x9c\xe7\xb4\xa2.*', '.'], ['goo\.ne\.jp/', 'MT=', '!', 'IE=UTF-8'], ['yahoo\.co\.jp/', 'p=', '!', 'ei=UTF-8'], ['yahoofs\.jp/', 'p=', '!', 'ei=UTF-8'], ['livedoor\.com/', 'q=', '!', 'ie=utf8'], ['fresheye\.com/', 'kw='], ['excite\.co\.jp/', 'search='], ['excite-cache\.jp/', 'search=', '=', 'preurl=.+', 's', '/.*[&\?]preurl=//', 'u'], ['nifty\.com/', 'Text='], ['biglobe\.ne\.jp/', 'q='], ['msn\.co\.jp/', 'q=', '=', 'cp=(932|50220|51932)'], ['ceek\.jp/', 'q='], ['[^/]*/search/cache\?', 'q=', 'h', 'search.yahoo.com', 's', '/\/search\/cache\?/\/search\?/', '.'], ['[^/]*/webcp.asp\?', 'q=', 'h', 'ask.jp', 's', '/\/webcp\.asp\?/\/web\.asp\?/', '.'], ['.*', '', '.'], ); $normalize = 0; $zenkaku_moji = " !#$’()*+,-./0123456789:;<>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_‘abcdefghijklmnopqrstuvwxyz{|}"; $hankaku_moji = " !#\$'()*+,-./0123456789:;<>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}"; if($ARGV[0] eq '-n'){$normalize = 1;} sub url_decode_x #($s) { $_[0] =~ s/\+|\x22|\x0d\x0a|\x0d|\x0a|%22|%25|%26|%3d|%0d%0a|%0d|%0a/ /gi; $_[0] =~ s/%([a-f\d]{2})/pack('H2',$1)/egi; } sub check_encode #($a, $s, \@e, $i) { my $a = \$_[0]; my $s = \$_[1]; my $e = $_[2]; my $i = $_[3]; while($$e[$i] ne ''){ if($$e[$i] eq '!'){if($$s =~ /[\?&]$$e[$i + 1](&|\s*)/){return 1;}} elsif($$e[$i] eq '='){if($$s !~ /[\?&]$$e[$i + 1](&|\s*)/){return 1;}} elsif($$e[$i] eq 'n'){if($$a =~ /[\?&]$$e[$i + 1](&|\s*)/){return 1;}} elsif($$e[$i] eq 'e'){if($$a !~ /[\?&]$$e[$i + 1](&|\s*)/){return 1;}} elsif($$e[$i] eq '.'){return 2;} elsif($$e[$i] eq ';'){return 1;} elsif($$e[$i] eq 'h'){$$s =~ s/^http:\/\/[^\/]*\//http:\/\/$$e[$i + 1]\//;} elsif($$e[$i] eq 's'){eval("\$\$s =~ s$$e[$i + 1]");} elsif($$e[$i] eq 'u'){$$s =~ s/%([a-f\d]{2})/pack('H2',$1)/egi; $$a = $$s; url_decode_x($$a); $i--;} $i += 2; } return 0; } my $a, @a, $e, @e, @f, @g, $r; while(){ @a = split(/\x22/); if($a[6] eq "\n"){$a[6] = " \n";} for($e = 1; $e <= 5; $e += 2){ if($a[$e] eq '' || $a[$e] eq '-'){next;} $a[$e] =~ s/([\$\@])/\\$1/g; $a[$e] = eval('"'.$a[$e].'"'); $a[$e] =~ s/\x22/%22/gi; } if($a[3] eq '' || $a[3] eq '-'){print join("\x22", @a); next;} $a = $a[3]; url_decode_x($a); foreach $e (@search_engine){ if($a[3] !~ /^http:\/\/([^\/]+\.|)$$e[0]/){next;} $r = check_encode($a, $a[3], $e, 2); if($r == 1){next;} url_decode_x($a[3]); if($r == 2){last;} @f = split(/[&\?]$$e[1]/, $a[3]); if($f[1] eq ''){next;} @g = split(/&/, $f[1]); $g[0] = Jcode->new(\$g[0])->utf8; $f[1] = join("&", @g); $a[3] = join(substr($a[3], length($f[0]), 1).$$e[1], @f); last; } if($normalize){$a[3] = Jcode->new(\$a[3], 'utf8')->h2z->tr($zenkaku_moji, $hankaku_moji)->utf8;} print join("\x22", @a); }