acmecert: Initial commit.
[utils.git] / ann.py
CommitLineData
5b7914ac
FT
1import os, hashlib, urllib.request, time, re, weakref
2from urllib.parse import urljoin, urlencode
3import bs4
4soup = lambda cont: bs4.BeautifulSoup(cont, "html.parser")
5
6602427b
FT
6__all__ = ["anime", "getlist",
7 "error", "incompatible"]
8
5b7914ac
FT
9base = "http://www.animenewsnetwork.com/encyclopedia/"
10
11class error(Exception):
12 pass
13
14class incompatible(error):
15 def __init__(self):
16 super().__init__("ANN HTML has changed")
17
18try:
19 cachedir = os.path.join(os.getenv("HOME"), ".ann", "cache")
20 if not os.path.isdir(cachedir):
21 os.makedirs(cachedir)
22except:
23 cachedir = None
24
25def cachename(url):
26 if not cachedir:
27 return None
28 d = hashlib.md5()
29 d.update(url.encode("ascii"))
30 return os.path.join(cachedir, d.hexdigest())
31
32def get(url):
33 data = None
34 cachefile = cachename(url)
35 if cachefile and os.path.exists(cachefile):
36 if time.time() - os.stat(cachefile).st_mtime < 86400:
37 with open(cachefile, "rb") as fp:
38 data = fp.read()
39 if data is None:
40 with urllib.request.urlopen(url) as fp:
41 data = fp.read()
42 if cachefile:
43 co = open(cachefile, "wb")
44 try:
45 co.write(data)
46 finally:
47 co.close()
48 return soup(data)
49
50def s(s, rx, rep):
51 m = re.search(rx, s, re.I)
52 if m:
53 return s[:m.start()] + rep + s[m.end():]
54 else:
55 return s
56
57def afind(soup, *args, **kwargs):
58 ret = soup.find(*args, **kwargs)
59 if ret is None:
60 raise incompatible()
61 return ret
62
63def cstr(soup):
64 if isinstance(soup, bs4.Tag) or isinstance(soup, list):
65 ret = ""
66 for el in soup:
67 ret += cstr(el)
68 return ret
6602427b
FT
69 elif isinstance(soup, str):
70 return soup
307f4e93
FT
71 elif soup is None:
72 return None
5b7914ac
FT
73 else:
74 return soup.string
75
76class cproperty(object):
77 _default = object()
78
79 def __init__(self, bk):
80 self.bk = bk
81 self.cache = weakref.WeakKeyDictionary()
82
83 def __get__(self, ins, cls):
84 if ins is None: return self
85 ret = self.cache.get(ins, self._default)
86 if ret is self._default:
87 ret = self.bk(ins)
88 self.cache[ins] = ret
89 return ret
90
91 def __set__(self, ins, val):
92 self.cache[ins] = val
93
94 def __delete__(self, ins):
95 if ins in self.cache:
96 del self.cache[ins]
97
98class anime(object):
99 def __init__(self, id):
100 self.id = id
101 self.url = urljoin(base, "anime.php?id=%i" % self.id)
102
103 @cproperty
104 def _page(self):
105 return get(self.url)
106
107 @cproperty
108 def _main(self):
109 return afind(self._page, "div", id="maincontent")
110
307f4e93 111 def _info(self, nm):
5b7914ac 112 for t in afind(self._main, "div", id="content-zone")("div", "encyc-info-type"):
307f4e93
FT
113 if t.strong and t.strong.text.lower().strip()[:-1] == nm:
114 return t.contents[t.contents.index(t.strong) + 1:]
5b7914ac
FT
115
116 @cproperty
117 def rawname(self):
307f4e93 118 return afind(self._main, "h1", id="page_header").text
5b7914ac
FT
119 _nre = re.compile(r"^(.*\S)\s+\(([^\)]+)\)$")
120 @cproperty
121 def _sname(self):
122 m = self._nre.search(self.rawname)
123 if not m:
124 return (self.rawname, None)
125 return m.groups()[0:2]
126 @property
127 def name(self): return self._sname[0]
128 @property
129 def type(self): return self._sname[1]
130
131 @cproperty
307f4e93
FT
132 def names(self):
133 ret = []
134 for el in self._info("alternative title"):
135 if isinstance(el, bs4.Tag) and el.name == "div" and "tab" in el.get("class", []):
136 m = self._nre.search(el.text)
137 if m:
138 ret.append((m.groups()[0], m.groups()[1]))
139 else:
140 ret.append((el.text, None))
141 if (self.name, None) in ret:
142 ret.remove((self.name, None))
143 ret.insert(0, (self.name, None))
144 return ret
145
146 @cproperty
5b7914ac 147 def eps(self):
307f4e93
FT
148 ret = cstr(self._info("number of episodes"))
149 if ret is None:
150 return ret
151 return int(ret)
5b7914ac 152
6602427b
FT
153 @cproperty
154 def vintage(self):
155 return cstr(self._info("vintage")).strip()
156
157 @cproperty
158 def genres(self):
159 return [cstr(el) for x in (self._info("genres") or []) if isinstance(x, bs4.Tag) for el in x.findAll("a")]
160
161 @cproperty
162 def themes(self):
163 return [cstr(el) for x in (self._info("themes") or []) if isinstance(x, bs4.Tag) for el in x.findAll("a")]
164
5b7914ac
FT
165 def __repr__(self):
166 return "<ann.anime: %r (%i)>" % (self.name, self.id)
167
168 def __str__(self):
169 return self.name
170
307f4e93
FT
171 @classmethod
172 def byid(cls, id):
173 return cls(id)
174
5b7914ac
FT
175linkpat = re.compile("^/encyclopedia/anime\\.php\\?id=(\d+)$")
176def getlist(name):
a613494a 177 name = s(name, "^(the|a)\s+", "")
5b7914ac
FT
178 if len(name) < 1:
179 raise error("list() needs a prefix of at least one character")
180 fc = name[0]
181 if 'a' <= fc <= 'z' or 'A' <= fc <= 'Z':
182 fc = fc.upper()
183 else:
184 fc = '9'
185 d = get(urljoin(base, "anime.php?" + urlencode({"list": fc})))
186 ret = []
187 ldiv = afind(afind(d, "div", id="maincontent"), "div", "lst")
188 for link in ldiv("a", "HOVERLINE"):
bdd30f1b 189 rawname = ""
5b7914ac
FT
190 for el in link.font:
191 if isinstance(el, str):
bdd30f1b
FT
192 rawname += el.strip()
193 mn = rawname.lower()
a613494a 194 mn = s(mn, "^a\s+", "")
bdd30f1b
FT
195 mn = mn.replace("\u014d", "ou")
196 mn = mn.replace("\u016b", "uu")
5b7914ac
FT
197 if mn.lower().startswith(name.lower()):
198 m = linkpat.match(link["href"])
199 if not m:
200 raise incompatible()
307f4e93 201 found = anime.byid(int(m.groups()[0]))
bdd30f1b 202 found.rawname = rawname
5b7914ac
FT
203 ret.append(found)
204 return ret