Commit | Line | Data |
---|---|---|
5b7914ac FT |
1 | import os, hashlib, urllib.request, time, re, weakref |
2 | from urllib.parse import urljoin, urlencode | |
3 | import bs4 | |
4 | soup = lambda cont: bs4.BeautifulSoup(cont, "html.parser") | |
5 | ||
6 | base = "http://www.animenewsnetwork.com/encyclopedia/" | |
7 | ||
8 | class error(Exception): | |
9 | pass | |
10 | ||
11 | class incompatible(error): | |
12 | def __init__(self): | |
13 | super().__init__("ANN HTML has changed") | |
14 | ||
15 | try: | |
16 | cachedir = os.path.join(os.getenv("HOME"), ".ann", "cache") | |
17 | if not os.path.isdir(cachedir): | |
18 | os.makedirs(cachedir) | |
19 | except: | |
20 | cachedir = None | |
21 | ||
22 | def cachename(url): | |
23 | if not cachedir: | |
24 | return None | |
25 | d = hashlib.md5() | |
26 | d.update(url.encode("ascii")) | |
27 | return os.path.join(cachedir, d.hexdigest()) | |
28 | ||
29 | def get(url): | |
30 | data = None | |
31 | cachefile = cachename(url) | |
32 | if cachefile and os.path.exists(cachefile): | |
33 | if time.time() - os.stat(cachefile).st_mtime < 86400: | |
34 | with open(cachefile, "rb") as fp: | |
35 | data = fp.read() | |
36 | if data is None: | |
37 | with urllib.request.urlopen(url) as fp: | |
38 | data = fp.read() | |
39 | if cachefile: | |
40 | co = open(cachefile, "wb") | |
41 | try: | |
42 | co.write(data) | |
43 | finally: | |
44 | co.close() | |
45 | return soup(data) | |
46 | ||
47 | def s(s, rx, rep): | |
48 | m = re.search(rx, s, re.I) | |
49 | if m: | |
50 | return s[:m.start()] + rep + s[m.end():] | |
51 | else: | |
52 | return s | |
53 | ||
54 | def afind(soup, *args, **kwargs): | |
55 | ret = soup.find(*args, **kwargs) | |
56 | if ret is None: | |
57 | raise incompatible() | |
58 | return ret | |
59 | ||
60 | def cstr(soup): | |
61 | if isinstance(soup, bs4.Tag) or isinstance(soup, list): | |
62 | ret = "" | |
63 | for el in soup: | |
64 | ret += cstr(el) | |
65 | return ret | |
66 | else: | |
67 | return soup.string | |
68 | ||
69 | class cproperty(object): | |
70 | _default = object() | |
71 | ||
72 | def __init__(self, bk): | |
73 | self.bk = bk | |
74 | self.cache = weakref.WeakKeyDictionary() | |
75 | ||
76 | def __get__(self, ins, cls): | |
77 | if ins is None: return self | |
78 | ret = self.cache.get(ins, self._default) | |
79 | if ret is self._default: | |
80 | ret = self.bk(ins) | |
81 | self.cache[ins] = ret | |
82 | return ret | |
83 | ||
84 | def __set__(self, ins, val): | |
85 | self.cache[ins] = val | |
86 | ||
87 | def __delete__(self, ins): | |
88 | if ins in self.cache: | |
89 | del self.cache[ins] | |
90 | ||
91 | class anime(object): | |
92 | def __init__(self, id): | |
93 | self.id = id | |
94 | self.url = urljoin(base, "anime.php?id=%i" % self.id) | |
95 | ||
96 | @cproperty | |
97 | def _page(self): | |
98 | return get(self.url) | |
99 | ||
100 | @cproperty | |
101 | def _main(self): | |
102 | return afind(self._page, "div", id="maincontent") | |
103 | ||
104 | @cproperty | |
105 | def _info(self): | |
106 | ret = {} | |
107 | for t in afind(self._main, "div", id="content-zone")("div", "encyc-info-type"): | |
108 | if t.strong: | |
109 | ret[t.strong.text.lower().strip()[:-1]] = t.contents[t.contents.index(t.strong) + 1:] | |
110 | return ret | |
111 | ||
112 | @cproperty | |
113 | def rawname(self): | |
114 | afind(self._main, "h1", id="page_header").text | |
115 | _nre = re.compile(r"^(.*\S)\s+\(([^\)]+)\)$") | |
116 | @cproperty | |
117 | def _sname(self): | |
118 | m = self._nre.search(self.rawname) | |
119 | if not m: | |
120 | return (self.rawname, None) | |
121 | return m.groups()[0:2] | |
122 | @property | |
123 | def name(self): return self._sname[0] | |
124 | @property | |
125 | def type(self): return self._sname[1] | |
126 | ||
127 | @cproperty | |
128 | def eps(self): | |
129 | return int(cstr(self._info["number of episodes"])) | |
130 | ||
131 | def __repr__(self): | |
132 | return "<ann.anime: %r (%i)>" % (self.name, self.id) | |
133 | ||
134 | def __str__(self): | |
135 | return self.name | |
136 | ||
137 | linkpat = re.compile("^/encyclopedia/anime\\.php\\?id=(\d+)$") | |
138 | def getlist(name): | |
139 | name = s(name, "^the\s+", "") | |
140 | if len(name) < 1: | |
141 | raise error("list() needs a prefix of at least one character") | |
142 | fc = name[0] | |
143 | if 'a' <= fc <= 'z' or 'A' <= fc <= 'Z': | |
144 | fc = fc.upper() | |
145 | else: | |
146 | fc = '9' | |
147 | d = get(urljoin(base, "anime.php?" + urlencode({"list": fc}))) | |
148 | ret = [] | |
149 | ldiv = afind(afind(d, "div", id="maincontent"), "div", "lst") | |
150 | for link in ldiv("a", "HOVERLINE"): | |
151 | mn = "" | |
152 | for el in link.font: | |
153 | if isinstance(el, str): | |
154 | mn += el.strip() | |
155 | if mn.lower().startswith(name.lower()): | |
156 | m = linkpat.match(link["href"]) | |
157 | if not m: | |
158 | raise incompatible() | |
159 | found = anime(int(m.groups()[0])) | |
160 | found.rawname = mn | |
161 | ret.append(found) | |
162 | return ret |