X-Git-Url: http://git.dolda2000.com/gitweb/?a=blobdiff_plain;ds=sidebyside;f=ANN.pm;h=6dd02e94fc951f0b1773cf7f5131f6ecf3359386;hb=1b3618666413f62448f5e63e13503f704c214516;hp=1406c4584045c8f4d636875d9ac481aa7166f38e;hpb=3e60094e9bdf9494bcddb4ebc036e39ae5d9b5ce;p=utils.git
diff --git a/ANN.pm b/ANN.pm
index 1406c45..6dd02e9 100644
--- a/ANN.pm
+++ b/ANN.pm
@@ -35,12 +35,12 @@ sub _get
$res = $ua->request(HTTP::Request->new("GET", "$uri"));
if(open CACHE, ">:utf8", $cname) {
- print CACHE $res->content;
+ print CACHE $res->decoded_content;
close CACHE;
}
return undef unless $res->is_success;
- return $res->content;
+ return $res->decoded_content;
}
sub getlist
@@ -48,7 +48,8 @@ sub getlist
my($name, $il, $html, @ret);
($name) = @_;
- $il = uc(($name =~ /^(.)/)[0]);
+ $name = ($name =~ /^(the\s+)?(.*)$/i)[1];
+ $il = uc(($name =~ /^\W*(.)/)[0]);
$il = "9" if (!($il =~ /[A-Z]/));
if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) {
return undef;
@@ -57,9 +58,9 @@ sub getlist
# The only way to recognize entries that seems sure is to look
# after the "HOVERLINE" class.
- while($html =~ /([^<]+)<\//ig) {
- if((substr "" . lc $2 , 0, length $name) eq lc $name) {
- push @ret, $2;
+ while($html =~ /]*>(]*>)?([^<]*<\/small>)?\s*([^<]+)<\//ig) {
+ if((substr "" . lc $4 , 0, length $name) eq lc $name) {
+ push @ret, $4;
}
}
# push @ret, $1 while $html =~ /.*([^<>]*$name[^<>]*)<\/FONT/ig;
@@ -72,7 +73,8 @@ sub getid
my($name, $il, $html, $url);
($name) = @_;
- $il = uc(($name =~ /^(.)/)[0]);
+ $name = ($name =~ /^(the\s+)?(.*)$/i)[1];
+ $il = uc(($name =~ /^\W*(.)/)[0]);
$il = "9" if (!($il =~ /[A-Z]/));
if(!($html = _get "http://www.animenewsnetwork.com/encyclopedia/anime.php?list=$il")) {
return undef;
@@ -81,8 +83,8 @@ sub getid
# The only way to recognize entries that seems sure is to look
# after the "HOVERLINE" class.
- while($html =~ /([^<]+)<\//ig) {
- if((substr "" . lc $2 , 0, length $name) eq lc $name) {
+ while($html =~ /]*>(]*>)?([^<]*<\/small>)?\s*([^<]+)<\//ig) {
+ if((substr "" . lc $4 , 0, length $name) eq lc $name) {
return ($1 =~ /id=(\d+)$/)[0];
}
}
@@ -103,12 +105,12 @@ sub getthemes
my($html, $kind, @ret);
($html, $kind) = @_;
- if($html =~ /$kind theme:<\/b>\n/igc) {
+ if($html =~ /$kind theme:<\/strong>\s*\n/igc) {
my(@parts, $ct, $buf);
- while($html =~ /\G\
(([^<>]|\|<\/i>)+)/igc) {
+ while($html =~ /\G\s*\(([^<>]|\
|<\/i>)+)(]*>[^<>]*]*>[^<>]*<\/span>)?<\/div>/igc) {
$buf = $1;
- # 0 1 2 3 4 5 6 7 8 9 10 11
- if(@parts = ($buf =~ /(\#(\d+):)?\s*\"([^\"\(]+\S)(\s*\((\(.*)<\/i>(;\s*)?)?([^<>]+)?\))?\"\s+by\s+([^\(]*[^\(\s])(\s*\(eps (\d+)-(\d+)?\))?/i)) {
+ # 0 1 2 3 4 5 6 7 8 9 10 1112
+ if(@parts = ($buf =~ /(\#(\d+):)?\s*\"([^\"\(]+\S)(\s*\((\(.*)<\/i>( - \s*)?)?([^<>]+)?\))?\"\s+by\s+([^\(]*[^\(\s])(\s*\(eps? (\d+)(-(\d+))?\))?/i)) {
$ct = {};
$ct->{"num"} = $parts[1] if defined $parts[1];
if(defined $parts[5]) {
@@ -120,7 +122,7 @@ sub getthemes
$ct->{"ent"} = decode_entities($parts[7]) if defined $parts[7];
$ct->{"prf"} = decode_entities($parts[8]) if defined $parts[8];
$ct->{"fep"} = $parts[10] if defined $parts[10];
- $ct->{"lep"} = $parts[11] if defined $parts[11];
+ $ct->{"lep"} = $parts[12] if defined $parts[12];
push @ret, $ct;
}
}
@@ -139,19 +141,19 @@ sub getseries
}
$ret{"url"} = geturl $id;
- ($buf) = ($html =~ /\Anime News Network - ([^<]*)<\/TITLE>/);
+ ($buf) = ($html =~ /\([^<]*) - Anime News Network<\/title>/);
if($buf =~ /\([^\)]+\)$/) {
($ret{"name"}, $ret{"type"}) = ($buf =~ /^(.*[^\s])\s*\(([^\)]+)\)$/);
} else {
$ret{"name"} = $buf;
}
- if(($buf) = ($html =~ /vintage:<\/b>\n([^<]+)\s*\n\s*([^<]+)\n([^<]+)\s*\n\s*([^<]+)\n([^<]+)\s*\n\s*([^<]+)