Commit | Line | Data |
---|---|---|
5b7914ac FT |
1 | import os, hashlib, urllib.request, time, re, weakref |
2 | from urllib.parse import urljoin, urlencode | |
3 | import bs4 | |
4 | soup = lambda cont: bs4.BeautifulSoup(cont, "html.parser") | |
5 | ||
6 | base = "http://www.animenewsnetwork.com/encyclopedia/" | |
7 | ||
8 | class error(Exception): | |
9 | pass | |
10 | ||
11 | class incompatible(error): | |
12 | def __init__(self): | |
13 | super().__init__("ANN HTML has changed") | |
14 | ||
15 | try: | |
16 | cachedir = os.path.join(os.getenv("HOME"), ".ann", "cache") | |
17 | if not os.path.isdir(cachedir): | |
18 | os.makedirs(cachedir) | |
19 | except: | |
20 | cachedir = None | |
21 | ||
22 | def cachename(url): | |
23 | if not cachedir: | |
24 | return None | |
25 | d = hashlib.md5() | |
26 | d.update(url.encode("ascii")) | |
27 | return os.path.join(cachedir, d.hexdigest()) | |
28 | ||
29 | def get(url): | |
30 | data = None | |
31 | cachefile = cachename(url) | |
32 | if cachefile and os.path.exists(cachefile): | |
33 | if time.time() - os.stat(cachefile).st_mtime < 86400: | |
34 | with open(cachefile, "rb") as fp: | |
35 | data = fp.read() | |
36 | if data is None: | |
37 | with urllib.request.urlopen(url) as fp: | |
38 | data = fp.read() | |
39 | if cachefile: | |
40 | co = open(cachefile, "wb") | |
41 | try: | |
42 | co.write(data) | |
43 | finally: | |
44 | co.close() | |
45 | return soup(data) | |
46 | ||
47 | def s(s, rx, rep): | |
48 | m = re.search(rx, s, re.I) | |
49 | if m: | |
50 | return s[:m.start()] + rep + s[m.end():] | |
51 | else: | |
52 | return s | |
53 | ||
54 | def afind(soup, *args, **kwargs): | |
55 | ret = soup.find(*args, **kwargs) | |
56 | if ret is None: | |
57 | raise incompatible() | |
58 | return ret | |
59 | ||
60 | def cstr(soup): | |
61 | if isinstance(soup, bs4.Tag) or isinstance(soup, list): | |
62 | ret = "" | |
63 | for el in soup: | |
64 | ret += cstr(el) | |
65 | return ret | |
307f4e93 FT |
66 | elif soup is None: |
67 | return None | |
5b7914ac FT |
68 | else: |
69 | return soup.string | |
70 | ||
71 | class cproperty(object): | |
72 | _default = object() | |
73 | ||
74 | def __init__(self, bk): | |
75 | self.bk = bk | |
76 | self.cache = weakref.WeakKeyDictionary() | |
77 | ||
78 | def __get__(self, ins, cls): | |
79 | if ins is None: return self | |
80 | ret = self.cache.get(ins, self._default) | |
81 | if ret is self._default: | |
82 | ret = self.bk(ins) | |
83 | self.cache[ins] = ret | |
84 | return ret | |
85 | ||
86 | def __set__(self, ins, val): | |
87 | self.cache[ins] = val | |
88 | ||
89 | def __delete__(self, ins): | |
90 | if ins in self.cache: | |
91 | del self.cache[ins] | |
92 | ||
93 | class anime(object): | |
94 | def __init__(self, id): | |
95 | self.id = id | |
96 | self.url = urljoin(base, "anime.php?id=%i" % self.id) | |
97 | ||
98 | @cproperty | |
99 | def _page(self): | |
100 | return get(self.url) | |
101 | ||
102 | @cproperty | |
103 | def _main(self): | |
104 | return afind(self._page, "div", id="maincontent") | |
105 | ||
307f4e93 | 106 | def _info(self, nm): |
5b7914ac | 107 | for t in afind(self._main, "div", id="content-zone")("div", "encyc-info-type"): |
307f4e93 FT |
108 | if t.strong and t.strong.text.lower().strip()[:-1] == nm: |
109 | return t.contents[t.contents.index(t.strong) + 1:] | |
5b7914ac FT |
110 | |
111 | @cproperty | |
112 | def rawname(self): | |
307f4e93 | 113 | return afind(self._main, "h1", id="page_header").text |
5b7914ac FT |
114 | _nre = re.compile(r"^(.*\S)\s+\(([^\)]+)\)$") |
115 | @cproperty | |
116 | def _sname(self): | |
117 | m = self._nre.search(self.rawname) | |
118 | if not m: | |
119 | return (self.rawname, None) | |
120 | return m.groups()[0:2] | |
121 | @property | |
122 | def name(self): return self._sname[0] | |
123 | @property | |
124 | def type(self): return self._sname[1] | |
125 | ||
126 | @cproperty | |
307f4e93 FT |
127 | def names(self): |
128 | ret = [] | |
129 | for el in self._info("alternative title"): | |
130 | if isinstance(el, bs4.Tag) and el.name == "div" and "tab" in el.get("class", []): | |
131 | m = self._nre.search(el.text) | |
132 | if m: | |
133 | ret.append((m.groups()[0], m.groups()[1])) | |
134 | else: | |
135 | ret.append((el.text, None)) | |
136 | if (self.name, None) in ret: | |
137 | ret.remove((self.name, None)) | |
138 | ret.insert(0, (self.name, None)) | |
139 | return ret | |
140 | ||
141 | @cproperty | |
5b7914ac | 142 | def eps(self): |
307f4e93 FT |
143 | ret = cstr(self._info("number of episodes")) |
144 | if ret is None: | |
145 | return ret | |
146 | return int(ret) | |
5b7914ac FT |
147 | |
148 | def __repr__(self): | |
149 | return "<ann.anime: %r (%i)>" % (self.name, self.id) | |
150 | ||
151 | def __str__(self): | |
152 | return self.name | |
153 | ||
307f4e93 FT |
154 | @classmethod |
155 | def byid(cls, id): | |
156 | return cls(id) | |
157 | ||
5b7914ac FT |
158 | linkpat = re.compile("^/encyclopedia/anime\\.php\\?id=(\d+)$") |
159 | def getlist(name): | |
a613494a | 160 | name = s(name, "^(the|a)\s+", "") |
5b7914ac FT |
161 | if len(name) < 1: |
162 | raise error("list() needs a prefix of at least one character") | |
163 | fc = name[0] | |
164 | if 'a' <= fc <= 'z' or 'A' <= fc <= 'Z': | |
165 | fc = fc.upper() | |
166 | else: | |
167 | fc = '9' | |
168 | d = get(urljoin(base, "anime.php?" + urlencode({"list": fc}))) | |
169 | ret = [] | |
170 | ldiv = afind(afind(d, "div", id="maincontent"), "div", "lst") | |
171 | for link in ldiv("a", "HOVERLINE"): | |
172 | mn = "" | |
173 | for el in link.font: | |
174 | if isinstance(el, str): | |
175 | mn += el.strip() | |
a613494a | 176 | mn = s(mn, "^a\s+", "") |
5b7914ac FT |
177 | if mn.lower().startswith(name.lower()): |
178 | m = linkpat.match(link["href"]) | |
179 | if not m: | |
180 | raise incompatible() | |
307f4e93 | 181 | found = anime.byid(int(m.groups()[0])) |
5b7914ac FT |
182 | found.rawname = mn |
183 | ret.append(found) | |
184 | return ret |