Commit | Line | Data |
---|---|---|
5b7914ac FT |
1 | import os, hashlib, urllib.request, time, re, weakref |
2 | from urllib.parse import urljoin, urlencode | |
3 | import bs4 | |
4 | soup = lambda cont: bs4.BeautifulSoup(cont, "html.parser") | |
5 | ||
6602427b FT |
6 | __all__ = ["anime", "getlist", |
7 | "error", "incompatible"] | |
8 | ||
5b7914ac FT |
9 | base = "http://www.animenewsnetwork.com/encyclopedia/" |
10 | ||
11 | class error(Exception): | |
12 | pass | |
13 | ||
14 | class incompatible(error): | |
15 | def __init__(self): | |
16 | super().__init__("ANN HTML has changed") | |
17 | ||
18 | try: | |
19 | cachedir = os.path.join(os.getenv("HOME"), ".ann", "cache") | |
20 | if not os.path.isdir(cachedir): | |
21 | os.makedirs(cachedir) | |
22 | except: | |
23 | cachedir = None | |
24 | ||
25 | def cachename(url): | |
26 | if not cachedir: | |
27 | return None | |
28 | d = hashlib.md5() | |
29 | d.update(url.encode("ascii")) | |
30 | return os.path.join(cachedir, d.hexdigest()) | |
31 | ||
32 | def get(url): | |
33 | data = None | |
34 | cachefile = cachename(url) | |
35 | if cachefile and os.path.exists(cachefile): | |
36 | if time.time() - os.stat(cachefile).st_mtime < 86400: | |
37 | with open(cachefile, "rb") as fp: | |
38 | data = fp.read() | |
39 | if data is None: | |
40 | with urllib.request.urlopen(url) as fp: | |
41 | data = fp.read() | |
42 | if cachefile: | |
43 | co = open(cachefile, "wb") | |
44 | try: | |
45 | co.write(data) | |
46 | finally: | |
47 | co.close() | |
48 | return soup(data) | |
49 | ||
50 | def s(s, rx, rep): | |
51 | m = re.search(rx, s, re.I) | |
52 | if m: | |
53 | return s[:m.start()] + rep + s[m.end():] | |
54 | else: | |
55 | return s | |
56 | ||
57 | def afind(soup, *args, **kwargs): | |
58 | ret = soup.find(*args, **kwargs) | |
59 | if ret is None: | |
60 | raise incompatible() | |
61 | return ret | |
62 | ||
63 | def cstr(soup): | |
64 | if isinstance(soup, bs4.Tag) or isinstance(soup, list): | |
65 | ret = "" | |
66 | for el in soup: | |
67 | ret += cstr(el) | |
68 | return ret | |
6602427b FT |
69 | elif isinstance(soup, str): |
70 | return soup | |
307f4e93 FT |
71 | elif soup is None: |
72 | return None | |
5b7914ac FT |
73 | else: |
74 | return soup.string | |
75 | ||
76 | class cproperty(object): | |
77 | _default = object() | |
78 | ||
79 | def __init__(self, bk): | |
80 | self.bk = bk | |
81 | self.cache = weakref.WeakKeyDictionary() | |
82 | ||
83 | def __get__(self, ins, cls): | |
84 | if ins is None: return self | |
85 | ret = self.cache.get(ins, self._default) | |
86 | if ret is self._default: | |
87 | ret = self.bk(ins) | |
88 | self.cache[ins] = ret | |
89 | return ret | |
90 | ||
91 | def __set__(self, ins, val): | |
92 | self.cache[ins] = val | |
93 | ||
94 | def __delete__(self, ins): | |
95 | if ins in self.cache: | |
96 | del self.cache[ins] | |
97 | ||
98 | class anime(object): | |
99 | def __init__(self, id): | |
100 | self.id = id | |
101 | self.url = urljoin(base, "anime.php?id=%i" % self.id) | |
102 | ||
103 | @cproperty | |
104 | def _page(self): | |
105 | return get(self.url) | |
106 | ||
107 | @cproperty | |
108 | def _main(self): | |
109 | return afind(self._page, "div", id="maincontent") | |
110 | ||
307f4e93 | 111 | def _info(self, nm): |
5b7914ac | 112 | for t in afind(self._main, "div", id="content-zone")("div", "encyc-info-type"): |
307f4e93 FT |
113 | if t.strong and t.strong.text.lower().strip()[:-1] == nm: |
114 | return t.contents[t.contents.index(t.strong) + 1:] | |
5b7914ac FT |
115 | |
116 | @cproperty | |
117 | def rawname(self): | |
307f4e93 | 118 | return afind(self._main, "h1", id="page_header").text |
5b7914ac FT |
119 | _nre = re.compile(r"^(.*\S)\s+\(([^\)]+)\)$") |
120 | @cproperty | |
121 | def _sname(self): | |
122 | m = self._nre.search(self.rawname) | |
123 | if not m: | |
124 | return (self.rawname, None) | |
125 | return m.groups()[0:2] | |
126 | @property | |
127 | def name(self): return self._sname[0] | |
128 | @property | |
129 | def type(self): return self._sname[1] | |
130 | ||
131 | @cproperty | |
307f4e93 FT |
132 | def names(self): |
133 | ret = [] | |
134 | for el in self._info("alternative title"): | |
135 | if isinstance(el, bs4.Tag) and el.name == "div" and "tab" in el.get("class", []): | |
136 | m = self._nre.search(el.text) | |
137 | if m: | |
138 | ret.append((m.groups()[0], m.groups()[1])) | |
139 | else: | |
140 | ret.append((el.text, None)) | |
141 | if (self.name, None) in ret: | |
142 | ret.remove((self.name, None)) | |
143 | ret.insert(0, (self.name, None)) | |
144 | return ret | |
145 | ||
146 | @cproperty | |
5b7914ac | 147 | def eps(self): |
307f4e93 FT |
148 | ret = cstr(self._info("number of episodes")) |
149 | if ret is None: | |
150 | return ret | |
151 | return int(ret) | |
5b7914ac | 152 | |
6602427b FT |
153 | @cproperty |
154 | def vintage(self): | |
155 | return cstr(self._info("vintage")).strip() | |
156 | ||
157 | @cproperty | |
158 | def genres(self): | |
159 | return [cstr(el) for x in (self._info("genres") or []) if isinstance(x, bs4.Tag) for el in x.findAll("a")] | |
160 | ||
161 | @cproperty | |
162 | def themes(self): | |
163 | return [cstr(el) for x in (self._info("themes") or []) if isinstance(x, bs4.Tag) for el in x.findAll("a")] | |
164 | ||
5b7914ac FT |
165 | def __repr__(self): |
166 | return "<ann.anime: %r (%i)>" % (self.name, self.id) | |
167 | ||
168 | def __str__(self): | |
169 | return self.name | |
170 | ||
307f4e93 FT |
171 | @classmethod |
172 | def byid(cls, id): | |
173 | return cls(id) | |
174 | ||
5b7914ac FT |
175 | linkpat = re.compile("^/encyclopedia/anime\\.php\\?id=(\d+)$") |
176 | def getlist(name): | |
a613494a | 177 | name = s(name, "^(the|a)\s+", "") |
5b7914ac FT |
178 | if len(name) < 1: |
179 | raise error("list() needs a prefix of at least one character") | |
180 | fc = name[0] | |
181 | if 'a' <= fc <= 'z' or 'A' <= fc <= 'Z': | |
182 | fc = fc.upper() | |
183 | else: | |
184 | fc = '9' | |
185 | d = get(urljoin(base, "anime.php?" + urlencode({"list": fc}))) | |
186 | ret = [] | |
187 | ldiv = afind(afind(d, "div", id="maincontent"), "div", "lst") | |
188 | for link in ldiv("a", "HOVERLINE"): | |
bdd30f1b | 189 | rawname = "" |
5b7914ac FT |
190 | for el in link.font: |
191 | if isinstance(el, str): | |
bdd30f1b FT |
192 | rawname += el.strip() |
193 | mn = rawname.lower() | |
a613494a | 194 | mn = s(mn, "^a\s+", "") |
bdd30f1b FT |
195 | mn = mn.replace("\u014d", "ou") |
196 | mn = mn.replace("\u016b", "uu") | |
5b7914ac FT |
197 | if mn.lower().startswith(name.lower()): |
198 | m = linkpat.match(link["href"]) | |
199 | if not m: | |
200 | raise incompatible() | |
307f4e93 | 201 | found = anime.byid(int(m.groups()[0])) |
bdd30f1b | 202 | found.rawname = rawname |
5b7914ac FT |
203 | ret.append(found) |
204 | return ret |