anndl: Replaced with new Python version.
[utils.git] / ann.py
... / ...
CommitLineData
1import os, hashlib, urllib.request, time, re, weakref
2from urllib.parse import urljoin, urlencode
3import bs4
4soup = lambda cont: bs4.BeautifulSoup(cont, "html.parser")
5
6base = "http://www.animenewsnetwork.com/encyclopedia/"
7
8class error(Exception):
9 pass
10
11class incompatible(error):
12 def __init__(self):
13 super().__init__("ANN HTML has changed")
14
15try:
16 cachedir = os.path.join(os.getenv("HOME"), ".ann", "cache")
17 if not os.path.isdir(cachedir):
18 os.makedirs(cachedir)
19except:
20 cachedir = None
21
22def cachename(url):
23 if not cachedir:
24 return None
25 d = hashlib.md5()
26 d.update(url.encode("ascii"))
27 return os.path.join(cachedir, d.hexdigest())
28
29def get(url):
30 data = None
31 cachefile = cachename(url)
32 if cachefile and os.path.exists(cachefile):
33 if time.time() - os.stat(cachefile).st_mtime < 86400:
34 with open(cachefile, "rb") as fp:
35 data = fp.read()
36 if data is None:
37 with urllib.request.urlopen(url) as fp:
38 data = fp.read()
39 if cachefile:
40 co = open(cachefile, "wb")
41 try:
42 co.write(data)
43 finally:
44 co.close()
45 return soup(data)
46
47def s(s, rx, rep):
48 m = re.search(rx, s, re.I)
49 if m:
50 return s[:m.start()] + rep + s[m.end():]
51 else:
52 return s
53
54def afind(soup, *args, **kwargs):
55 ret = soup.find(*args, **kwargs)
56 if ret is None:
57 raise incompatible()
58 return ret
59
60def cstr(soup):
61 if isinstance(soup, bs4.Tag) or isinstance(soup, list):
62 ret = ""
63 for el in soup:
64 ret += cstr(el)
65 return ret
66 elif soup is None:
67 return None
68 else:
69 return soup.string
70
71class cproperty(object):
72 _default = object()
73
74 def __init__(self, bk):
75 self.bk = bk
76 self.cache = weakref.WeakKeyDictionary()
77
78 def __get__(self, ins, cls):
79 if ins is None: return self
80 ret = self.cache.get(ins, self._default)
81 if ret is self._default:
82 ret = self.bk(ins)
83 self.cache[ins] = ret
84 return ret
85
86 def __set__(self, ins, val):
87 self.cache[ins] = val
88
89 def __delete__(self, ins):
90 if ins in self.cache:
91 del self.cache[ins]
92
93class anime(object):
94 def __init__(self, id):
95 self.id = id
96 self.url = urljoin(base, "anime.php?id=%i" % self.id)
97
98 @cproperty
99 def _page(self):
100 return get(self.url)
101
102 @cproperty
103 def _main(self):
104 return afind(self._page, "div", id="maincontent")
105
106 def _info(self, nm):
107 for t in afind(self._main, "div", id="content-zone")("div", "encyc-info-type"):
108 if t.strong and t.strong.text.lower().strip()[:-1] == nm:
109 return t.contents[t.contents.index(t.strong) + 1:]
110
111 @cproperty
112 def rawname(self):
113 return afind(self._main, "h1", id="page_header").text
114 _nre = re.compile(r"^(.*\S)\s+\(([^\)]+)\)$")
115 @cproperty
116 def _sname(self):
117 m = self._nre.search(self.rawname)
118 if not m:
119 return (self.rawname, None)
120 return m.groups()[0:2]
121 @property
122 def name(self): return self._sname[0]
123 @property
124 def type(self): return self._sname[1]
125
126 @cproperty
127 def names(self):
128 ret = []
129 for el in self._info("alternative title"):
130 if isinstance(el, bs4.Tag) and el.name == "div" and "tab" in el.get("class", []):
131 m = self._nre.search(el.text)
132 if m:
133 ret.append((m.groups()[0], m.groups()[1]))
134 else:
135 ret.append((el.text, None))
136 if (self.name, None) in ret:
137 ret.remove((self.name, None))
138 ret.insert(0, (self.name, None))
139 return ret
140
141 @cproperty
142 def eps(self):
143 ret = cstr(self._info("number of episodes"))
144 if ret is None:
145 return ret
146 return int(ret)
147
148 def __repr__(self):
149 return "<ann.anime: %r (%i)>" % (self.name, self.id)
150
151 def __str__(self):
152 return self.name
153
154 @classmethod
155 def byid(cls, id):
156 return cls(id)
157
158linkpat = re.compile("^/encyclopedia/anime\\.php\\?id=(\d+)$")
159def getlist(name):
160 name = s(name, "^the\s+", "")
161 if len(name) < 1:
162 raise error("list() needs a prefix of at least one character")
163 fc = name[0]
164 if 'a' <= fc <= 'z' or 'A' <= fc <= 'Z':
165 fc = fc.upper()
166 else:
167 fc = '9'
168 d = get(urljoin(base, "anime.php?" + urlencode({"list": fc})))
169 ret = []
170 ldiv = afind(afind(d, "div", id="maincontent"), "div", "lst")
171 for link in ldiv("a", "HOVERLINE"):
172 mn = ""
173 for el in link.font:
174 if isinstance(el, str):
175 mn += el.strip()
176 if mn.lower().startswith(name.lower()):
177 m = linkpat.match(link["href"])
178 if not m:
179 raise incompatible()
180 found = anime.byid(int(m.groups()[0]))
181 found.rawname = mn
182 ret.append(found)
183 return ret