| 1 | import os, hashlib, urllib.request, time, re, weakref |
| 2 | from urllib.parse import urljoin, urlencode |
| 3 | import bs4 |
| 4 | soup = lambda cont: bs4.BeautifulSoup(cont, "html.parser") |
| 5 | |
| 6 | __all__ = ["anime", "getlist", |
| 7 | "error", "incompatible"] |
| 8 | |
| 9 | base = "http://www.animenewsnetwork.com/encyclopedia/" |
| 10 | |
| 11 | class error(Exception): |
| 12 | pass |
| 13 | |
| 14 | class incompatible(error): |
| 15 | def __init__(self): |
| 16 | super().__init__("ANN HTML has changed") |
| 17 | |
| 18 | try: |
| 19 | cachedir = os.path.join(os.getenv("HOME"), ".ann", "cache") |
| 20 | if not os.path.isdir(cachedir): |
| 21 | os.makedirs(cachedir) |
| 22 | except: |
| 23 | cachedir = None |
| 24 | |
| 25 | def cachename(url): |
| 26 | if not cachedir: |
| 27 | return None |
| 28 | d = hashlib.md5() |
| 29 | d.update(url.encode("ascii")) |
| 30 | return os.path.join(cachedir, d.hexdigest()) |
| 31 | |
| 32 | def get(url): |
| 33 | data = None |
| 34 | cachefile = cachename(url) |
| 35 | if cachefile and os.path.exists(cachefile): |
| 36 | if time.time() - os.stat(cachefile).st_mtime < 86400: |
| 37 | with open(cachefile, "rb") as fp: |
| 38 | data = fp.read() |
| 39 | if data is None: |
| 40 | with urllib.request.urlopen(url) as fp: |
| 41 | data = fp.read() |
| 42 | if cachefile: |
| 43 | co = open(cachefile, "wb") |
| 44 | try: |
| 45 | co.write(data) |
| 46 | finally: |
| 47 | co.close() |
| 48 | return soup(data) |
| 49 | |
| 50 | def s(s, rx, rep): |
| 51 | m = re.search(rx, s, re.I) |
| 52 | if m: |
| 53 | return s[:m.start()] + rep + s[m.end():] |
| 54 | else: |
| 55 | return s |
| 56 | |
| 57 | def afind(soup, *args, **kwargs): |
| 58 | ret = soup.find(*args, **kwargs) |
| 59 | if ret is None: |
| 60 | raise incompatible() |
| 61 | return ret |
| 62 | |
| 63 | def cstr(soup): |
| 64 | if isinstance(soup, bs4.Tag) or isinstance(soup, list): |
| 65 | ret = "" |
| 66 | for el in soup: |
| 67 | ret += cstr(el) |
| 68 | return ret |
| 69 | elif isinstance(soup, str): |
| 70 | return soup |
| 71 | elif soup is None: |
| 72 | return None |
| 73 | else: |
| 74 | return soup.string |
| 75 | |
| 76 | class cproperty(object): |
| 77 | _default = object() |
| 78 | |
| 79 | def __init__(self, bk): |
| 80 | self.bk = bk |
| 81 | self.cache = weakref.WeakKeyDictionary() |
| 82 | |
| 83 | def __get__(self, ins, cls): |
| 84 | if ins is None: return self |
| 85 | ret = self.cache.get(ins, self._default) |
| 86 | if ret is self._default: |
| 87 | ret = self.bk(ins) |
| 88 | self.cache[ins] = ret |
| 89 | return ret |
| 90 | |
| 91 | def __set__(self, ins, val): |
| 92 | self.cache[ins] = val |
| 93 | |
| 94 | def __delete__(self, ins): |
| 95 | if ins in self.cache: |
| 96 | del self.cache[ins] |
| 97 | |
| 98 | class anime(object): |
| 99 | def __init__(self, id): |
| 100 | self.id = id |
| 101 | self.url = urljoin(base, "anime.php?id=%i" % self.id) |
| 102 | |
| 103 | @cproperty |
| 104 | def _page(self): |
| 105 | return get(self.url) |
| 106 | |
| 107 | @cproperty |
| 108 | def _main(self): |
| 109 | return afind(self._page, "div", id="maincontent") |
| 110 | |
| 111 | def _info(self, nm): |
| 112 | for t in afind(self._main, "div", id="content-zone")("div", "encyc-info-type"): |
| 113 | if t.strong and t.strong.text.lower().strip()[:-1] == nm: |
| 114 | return t.contents[t.contents.index(t.strong) + 1:] |
| 115 | |
| 116 | @cproperty |
| 117 | def rawname(self): |
| 118 | return afind(self._main, "h1", id="page_header").text |
| 119 | _nre = re.compile(r"^(.*\S)\s+\(([^\)]+)\)$") |
| 120 | @cproperty |
| 121 | def _sname(self): |
| 122 | m = self._nre.search(self.rawname) |
| 123 | if not m: |
| 124 | return (self.rawname, None) |
| 125 | return m.groups()[0:2] |
| 126 | @property |
| 127 | def name(self): return self._sname[0] |
| 128 | @property |
| 129 | def type(self): return self._sname[1] |
| 130 | |
| 131 | @cproperty |
| 132 | def names(self): |
| 133 | ret = [] |
| 134 | for el in self._info("alternative title"): |
| 135 | if isinstance(el, bs4.Tag) and el.name == "div" and "tab" in el.get("class", []): |
| 136 | m = self._nre.search(el.text) |
| 137 | if m: |
| 138 | ret.append((m.groups()[0], m.groups()[1])) |
| 139 | else: |
| 140 | ret.append((el.text, None)) |
| 141 | if (self.name, None) in ret: |
| 142 | ret.remove((self.name, None)) |
| 143 | ret.insert(0, (self.name, None)) |
| 144 | return ret |
| 145 | |
| 146 | @cproperty |
| 147 | def eps(self): |
| 148 | ret = cstr(self._info("number of episodes")) |
| 149 | if ret is None: |
| 150 | return ret |
| 151 | return int(ret) |
| 152 | |
| 153 | @cproperty |
| 154 | def vintage(self): |
| 155 | return cstr(self._info("vintage")).strip() |
| 156 | |
| 157 | @cproperty |
| 158 | def genres(self): |
| 159 | return [cstr(el) for x in (self._info("genres") or []) if isinstance(x, bs4.Tag) for el in x.findAll("a")] |
| 160 | |
| 161 | @cproperty |
| 162 | def themes(self): |
| 163 | return [cstr(el) for x in (self._info("themes") or []) if isinstance(x, bs4.Tag) for el in x.findAll("a")] |
| 164 | |
| 165 | def __repr__(self): |
| 166 | return "<ann.anime: %r (%i)>" % (self.name, self.id) |
| 167 | |
| 168 | def __str__(self): |
| 169 | return self.name |
| 170 | |
| 171 | @classmethod |
| 172 | def byid(cls, id): |
| 173 | return cls(id) |
| 174 | |
| 175 | linkpat = re.compile("^/encyclopedia/anime\\.php\\?id=(\d+)$") |
| 176 | def getlist(name): |
| 177 | name = s(name, "^(the|a)\s+", "") |
| 178 | if len(name) < 1: |
| 179 | raise error("list() needs a prefix of at least one character") |
| 180 | fc = name[0] |
| 181 | if 'a' <= fc <= 'z' or 'A' <= fc <= 'Z': |
| 182 | fc = fc.upper() |
| 183 | else: |
| 184 | fc = '9' |
| 185 | d = get(urljoin(base, "anime.php?" + urlencode({"list": fc}))) |
| 186 | ret = [] |
| 187 | ldiv = afind(afind(d, "div", id="maincontent"), "div", "lst") |
| 188 | for link in ldiv("a", "HOVERLINE"): |
| 189 | rawname = "" |
| 190 | for el in link.font: |
| 191 | if isinstance(el, str): |
| 192 | rawname += el.strip() |
| 193 | mn = rawname.lower() |
| 194 | mn = s(mn, "^a\s+", "") |
| 195 | mn = mn.replace("\u014d", "ou") |
| 196 | mn = mn.replace("\u016b", "uu") |
| 197 | if mn.lower().startswith(name.lower()): |
| 198 | m = linkpat.match(link["href"]) |
| 199 | if not m: |
| 200 | raise incompatible() |
| 201 | found = anime.byid(int(m.groups()[0])) |
| 202 | found.rawname = rawname |
| 203 | ret.append(found) |
| 204 | return ret |