From: fredrik Date: Thu, 27 Aug 2009 00:03:21 +0000 (+0000) Subject: ANN: Updated HTML patterns X-Git-Url: http://git.dolda2000.com/gitweb/?a=commitdiff_plain;h=553e41ef5bc95f5edcce12ccf15beae3d0fdaddc;hp=8f7ee74010f36762474df1bf0b04010865fb19c0;p=utils.git ANN: Updated HTML patterns git-svn-id: svn+ssh://svn.dolda2000.com/srv/svn/repos/src/utils@1121 959494ce-11ee-0310-bf91-de5d638817bd --- diff --git a/ANN.pm b/ANN.pm index a9e8014..48bd9c4 100644 --- a/ANN.pm +++ b/ANN.pm @@ -49,7 +49,7 @@ sub getlist ($name) = @_; $name = ($name =~ /^(the\s+)?(.*)$/i)[1]; - $il = uc(($name =~ /^(.)/)[0]); + $il = uc(($name =~ /^\W*(.)/)[0]); $il = "9" if (!($il =~ /[A-Z]/)); if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) { return undef; @@ -74,7 +74,7 @@ sub getid ($name) = @_; $name = ($name =~ /^(the\s+)?(.*)$/i)[1]; - $il = uc(($name =~ /^(.)/)[0]); + $il = uc(($name =~ /^\W*(.)/)[0]); $il = "9" if (!($il =~ /[A-Z]/)); if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) { return undef; @@ -107,7 +107,7 @@ sub getthemes if($html =~ /$kind theme:<\/strong>\s*\n/igc) { my(@parts, $ct, $buf); - while($html =~ /\G\s*\
(([^<>]|\|<\/i>)+)<\/div>/igc) { + while($html =~ /\G\s*\
(([^<>]|\|<\/i>)+)(]*>[^<>]*]*>[^<>]*<\/span>)?<\/div>/igc) { $buf = $1; # 0 1 2 3 4 5 6 7 8 9 10 1112 if(@parts = ($buf =~ /(\#(\d+):)?\s*\"([^\"\(]+\S)(\s*\((\(.*)<\/i>( - \s*)?)?([^<>]+)?\))?\"\s+by\s+([^\(]*[^\(\s])(\s*\(eps? (\d+)(-(\d+))?\))?/i)) {