| 1 | import os, hashlib, urllib.request, time, re, weakref |
| 2 | from urllib.parse import urljoin, urlencode |
| 3 | import bs4 |
| 4 | soup = lambda cont: bs4.BeautifulSoup(cont, "html.parser") |
| 5 | |
| 6 | base = "http://www.animenewsnetwork.com/encyclopedia/" |
| 7 | |
| 8 | class error(Exception): |
| 9 | pass |
| 10 | |
| 11 | class incompatible(error): |
| 12 | def __init__(self): |
| 13 | super().__init__("ANN HTML has changed") |
| 14 | |
| 15 | try: |
| 16 | cachedir = os.path.join(os.getenv("HOME"), ".ann", "cache") |
| 17 | if not os.path.isdir(cachedir): |
| 18 | os.makedirs(cachedir) |
| 19 | except: |
| 20 | cachedir = None |
| 21 | |
| 22 | def cachename(url): |
| 23 | if not cachedir: |
| 24 | return None |
| 25 | d = hashlib.md5() |
| 26 | d.update(url.encode("ascii")) |
| 27 | return os.path.join(cachedir, d.hexdigest()) |
| 28 | |
| 29 | def get(url): |
| 30 | data = None |
| 31 | cachefile = cachename(url) |
| 32 | if cachefile and os.path.exists(cachefile): |
| 33 | if time.time() - os.stat(cachefile).st_mtime < 86400: |
| 34 | with open(cachefile, "rb") as fp: |
| 35 | data = fp.read() |
| 36 | if data is None: |
| 37 | with urllib.request.urlopen(url) as fp: |
| 38 | data = fp.read() |
| 39 | if cachefile: |
| 40 | co = open(cachefile, "wb") |
| 41 | try: |
| 42 | co.write(data) |
| 43 | finally: |
| 44 | co.close() |
| 45 | return soup(data) |
| 46 | |
| 47 | def s(s, rx, rep): |
| 48 | m = re.search(rx, s, re.I) |
| 49 | if m: |
| 50 | return s[:m.start()] + rep + s[m.end():] |
| 51 | else: |
| 52 | return s |
| 53 | |
| 54 | def afind(soup, *args, **kwargs): |
| 55 | ret = soup.find(*args, **kwargs) |
| 56 | if ret is None: |
| 57 | raise incompatible() |
| 58 | return ret |
| 59 | |
| 60 | def cstr(soup): |
| 61 | if isinstance(soup, bs4.Tag) or isinstance(soup, list): |
| 62 | ret = "" |
| 63 | for el in soup: |
| 64 | ret += cstr(el) |
| 65 | return ret |
| 66 | elif soup is None: |
| 67 | return None |
| 68 | else: |
| 69 | return soup.string |
| 70 | |
| 71 | class cproperty(object): |
| 72 | _default = object() |
| 73 | |
| 74 | def __init__(self, bk): |
| 75 | self.bk = bk |
| 76 | self.cache = weakref.WeakKeyDictionary() |
| 77 | |
| 78 | def __get__(self, ins, cls): |
| 79 | if ins is None: return self |
| 80 | ret = self.cache.get(ins, self._default) |
| 81 | if ret is self._default: |
| 82 | ret = self.bk(ins) |
| 83 | self.cache[ins] = ret |
| 84 | return ret |
| 85 | |
| 86 | def __set__(self, ins, val): |
| 87 | self.cache[ins] = val |
| 88 | |
| 89 | def __delete__(self, ins): |
| 90 | if ins in self.cache: |
| 91 | del self.cache[ins] |
| 92 | |
| 93 | class anime(object): |
| 94 | def __init__(self, id): |
| 95 | self.id = id |
| 96 | self.url = urljoin(base, "anime.php?id=%i" % self.id) |
| 97 | |
| 98 | @cproperty |
| 99 | def _page(self): |
| 100 | return get(self.url) |
| 101 | |
| 102 | @cproperty |
| 103 | def _main(self): |
| 104 | return afind(self._page, "div", id="maincontent") |
| 105 | |
| 106 | def _info(self, nm): |
| 107 | for t in afind(self._main, "div", id="content-zone")("div", "encyc-info-type"): |
| 108 | if t.strong and t.strong.text.lower().strip()[:-1] == nm: |
| 109 | return t.contents[t.contents.index(t.strong) + 1:] |
| 110 | |
| 111 | @cproperty |
| 112 | def rawname(self): |
| 113 | return afind(self._main, "h1", id="page_header").text |
| 114 | _nre = re.compile(r"^(.*\S)\s+\(([^\)]+)\)$") |
| 115 | @cproperty |
| 116 | def _sname(self): |
| 117 | m = self._nre.search(self.rawname) |
| 118 | if not m: |
| 119 | return (self.rawname, None) |
| 120 | return m.groups()[0:2] |
| 121 | @property |
| 122 | def name(self): return self._sname[0] |
| 123 | @property |
| 124 | def type(self): return self._sname[1] |
| 125 | |
| 126 | @cproperty |
| 127 | def names(self): |
| 128 | ret = [] |
| 129 | for el in self._info("alternative title"): |
| 130 | if isinstance(el, bs4.Tag) and el.name == "div" and "tab" in el.get("class", []): |
| 131 | m = self._nre.search(el.text) |
| 132 | if m: |
| 133 | ret.append((m.groups()[0], m.groups()[1])) |
| 134 | else: |
| 135 | ret.append((el.text, None)) |
| 136 | if (self.name, None) in ret: |
| 137 | ret.remove((self.name, None)) |
| 138 | ret.insert(0, (self.name, None)) |
| 139 | return ret |
| 140 | |
| 141 | @cproperty |
| 142 | def eps(self): |
| 143 | ret = cstr(self._info("number of episodes")) |
| 144 | if ret is None: |
| 145 | return ret |
| 146 | return int(ret) |
| 147 | |
| 148 | def __repr__(self): |
| 149 | return "<ann.anime: %r (%i)>" % (self.name, self.id) |
| 150 | |
| 151 | def __str__(self): |
| 152 | return self.name |
| 153 | |
| 154 | @classmethod |
| 155 | def byid(cls, id): |
| 156 | return cls(id) |
| 157 | |
| 158 | linkpat = re.compile("^/encyclopedia/anime\\.php\\?id=(\d+)$") |
| 159 | def getlist(name): |
| 160 | name = s(name, "^the\s+", "") |
| 161 | if len(name) < 1: |
| 162 | raise error("list() needs a prefix of at least one character") |
| 163 | fc = name[0] |
| 164 | if 'a' <= fc <= 'z' or 'A' <= fc <= 'Z': |
| 165 | fc = fc.upper() |
| 166 | else: |
| 167 | fc = '9' |
| 168 | d = get(urljoin(base, "anime.php?" + urlencode({"list": fc}))) |
| 169 | ret = [] |
| 170 | ldiv = afind(afind(d, "div", id="maincontent"), "div", "lst") |
| 171 | for link in ldiv("a", "HOVERLINE"): |
| 172 | mn = "" |
| 173 | for el in link.font: |
| 174 | if isinstance(el, str): |
| 175 | mn += el.strip() |
| 176 | if mn.lower().startswith(name.lower()): |
| 177 | m = linkpat.match(link["href"]) |
| 178 | if not m: |
| 179 | raise incompatible() |
| 180 | found = anime.byid(int(m.groups()[0])) |
| 181 | found.rawname = mn |
| 182 | ret.append(found) |
| 183 | return ret |