--- /dev/null
+import os, hashlib, urllib.request, time, re, weakref
+from urllib.parse import urljoin, urlencode
+import bs4
+soup = lambda cont: bs4.BeautifulSoup(cont, "html.parser")
+
+base = "http://www.animenewsnetwork.com/encyclopedia/"
+
+class error(Exception):
+ pass
+
+class incompatible(error):
+ def __init__(self):
+ super().__init__("ANN HTML has changed")
+
+try:
+ cachedir = os.path.join(os.getenv("HOME"), ".ann", "cache")
+ if not os.path.isdir(cachedir):
+ os.makedirs(cachedir)
+except:
+ cachedir = None
+
+def cachename(url):
+ if not cachedir:
+ return None
+ d = hashlib.md5()
+ d.update(url.encode("ascii"))
+ return os.path.join(cachedir, d.hexdigest())
+
+def get(url):
+ data = None
+ cachefile = cachename(url)
+ if cachefile and os.path.exists(cachefile):
+ if time.time() - os.stat(cachefile).st_mtime < 86400:
+ with open(cachefile, "rb") as fp:
+ data = fp.read()
+ if data is None:
+ with urllib.request.urlopen(url) as fp:
+ data = fp.read()
+ if cachefile:
+ co = open(cachefile, "wb")
+ try:
+ co.write(data)
+ finally:
+ co.close()
+ return soup(data)
+
+def s(s, rx, rep):
+ m = re.search(rx, s, re.I)
+ if m:
+ return s[:m.start()] + rep + s[m.end():]
+ else:
+ return s
+
+def afind(soup, *args, **kwargs):
+ ret = soup.find(*args, **kwargs)
+ if ret is None:
+ raise incompatible()
+ return ret
+
+def cstr(soup):
+ if isinstance(soup, bs4.Tag) or isinstance(soup, list):
+ ret = ""
+ for el in soup:
+ ret += cstr(el)
+ return ret
+ else:
+ return soup.string
+
+class cproperty(object):
+ _default = object()
+
+ def __init__(self, bk):
+ self.bk = bk
+ self.cache = weakref.WeakKeyDictionary()
+
+ def __get__(self, ins, cls):
+ if ins is None: return self
+ ret = self.cache.get(ins, self._default)
+ if ret is self._default:
+ ret = self.bk(ins)
+ self.cache[ins] = ret
+ return ret
+
+ def __set__(self, ins, val):
+ self.cache[ins] = val
+
+ def __delete__(self, ins):
+ if ins in self.cache:
+ del self.cache[ins]
+
+class anime(object):
+ def __init__(self, id):
+ self.id = id
+ self.url = urljoin(base, "anime.php?id=%i" % self.id)
+
+ @cproperty
+ def _page(self):
+ return get(self.url)
+
+ @cproperty
+ def _main(self):
+ return afind(self._page, "div", id="maincontent")
+
+ @cproperty
+ def _info(self):
+ ret = {}
+ for t in afind(self._main, "div", id="content-zone")("div", "encyc-info-type"):
+ if t.strong:
+ ret[t.strong.text.lower().strip()[:-1]] = t.contents[t.contents.index(t.strong) + 1:]
+ return ret
+
+ @cproperty
+ def rawname(self):
+ afind(self._main, "h1", id="page_header").text
+ _nre = re.compile(r"^(.*\S)\s+\(([^\)]+)\)$")
+ @cproperty
+ def _sname(self):
+ m = self._nre.search(self.rawname)
+ if not m:
+ return (self.rawname, None)
+ return m.groups()[0:2]
+ @property
+ def name(self): return self._sname[0]
+ @property
+ def type(self): return self._sname[1]
+
+ @cproperty
+ def eps(self):
+ return int(cstr(self._info["number of episodes"]))
+
+ def __repr__(self):
+ return "<ann.anime: %r (%i)>" % (self.name, self.id)
+
+ def __str__(self):
+ return self.name
+
+linkpat = re.compile("^/encyclopedia/anime\\.php\\?id=(\d+)$")
+def getlist(name):
+ name = s(name, "^the\s+", "")
+ if len(name) < 1:
+ raise error("list() needs a prefix of at least one character")
+ fc = name[0]
+ if 'a' <= fc <= 'z' or 'A' <= fc <= 'Z':
+ fc = fc.upper()
+ else:
+ fc = '9'
+ d = get(urljoin(base, "anime.php?" + urlencode({"list": fc})))
+ ret = []
+ ldiv = afind(afind(d, "div", id="maincontent"), "div", "lst")
+ for link in ldiv("a", "HOVERLINE"):
+ mn = ""
+ for el in link.font:
+ if isinstance(el, str):
+ mn += el.strip()
+ if mn.lower().startswith(name.lower()):
+ m = linkpat.match(link["href"])
+ if not m:
+ raise incompatible()
+ found = anime(int(m.groups()[0]))
+ found.rawname = mn
+ ret.append(found)
+ return ret