1 import os, hashlib, urllib.request, time, re, weakref
2 from urllib.parse import urljoin, urlencode
4 soup = lambda cont: bs4.BeautifulSoup(cont, "html.parser")
6 base = "http://www.animenewsnetwork.com/encyclopedia/"
8 class error(Exception):
11 class incompatible(error):
13 super().__init__("ANN HTML has changed")
16 cachedir = os.path.join(os.getenv("HOME"), ".ann", "cache")
17 if not os.path.isdir(cachedir):
26 d.update(url.encode("ascii"))
27 return os.path.join(cachedir, d.hexdigest())
31 cachefile = cachename(url)
32 if cachefile and os.path.exists(cachefile):
33 if time.time() - os.stat(cachefile).st_mtime < 86400:
34 with open(cachefile, "rb") as fp:
37 with urllib.request.urlopen(url) as fp:
40 co = open(cachefile, "wb")
48 m = re.search(rx, s, re.I)
50 return s[:m.start()] + rep + s[m.end():]
54 def afind(soup, *args, **kwargs):
55 ret = soup.find(*args, **kwargs)
61 if isinstance(soup, bs4.Tag) or isinstance(soup, list):
69 class cproperty(object):
72 def __init__(self, bk):
74 self.cache = weakref.WeakKeyDictionary()
76 def __get__(self, ins, cls):
77 if ins is None: return self
78 ret = self.cache.get(ins, self._default)
79 if ret is self._default:
84 def __set__(self, ins, val):
87 def __delete__(self, ins):
92 def __init__(self, id):
94 self.url = urljoin(base, "anime.php?id=%i" % self.id)
102 return afind(self._page, "div", id="maincontent")
107 for t in afind(self._main, "div", id="content-zone")("div", "encyc-info-type"):
109 ret[t.strong.text.lower().strip()[:-1]] = t.contents[t.contents.index(t.strong) + 1:]
114 afind(self._main, "h1", id="page_header").text
115 _nre = re.compile(r"^(.*\S)\s+\(([^\)]+)\)$")
118 m = self._nre.search(self.rawname)
120 return (self.rawname, None)
121 return m.groups()[0:2]
123 def name(self): return self._sname[0]
125 def type(self): return self._sname[1]
129 return int(cstr(self._info["number of episodes"]))
132 return "<ann.anime: %r (%i)>" % (self.name, self.id)
137 linkpat = re.compile("^/encyclopedia/anime\\.php\\?id=(\d+)$")
139 name = s(name, "^the\s+", "")
141 raise error("list() needs a prefix of at least one character")
143 if 'a' <= fc <= 'z' or 'A' <= fc <= 'Z':
147 d = get(urljoin(base, "anime.php?" + urlencode({"list": fc})))
149 ldiv = afind(afind(d, "div", id="maincontent"), "div", "lst")
150 for link in ldiv("a", "HOVERLINE"):
153 if isinstance(el, str):
155 if mn.lower().startswith(name.lower()):
156 m = linkpat.match(link["href"])
159 found = anime(int(m.groups()[0]))