Commit | Line | Data |
---|---|---|
c39028a4 | 1 | import urllib.request, urllib.parse, http.cookiejar, re, bs4, os, time |
81be6921 FT |
2 | from . import profile, lib, htcache |
3 | soup = bs4.BeautifulSoup | |
4 | soupify = lambda cont: soup(cont, "html.parser") | |
5 | ||
6 | class pageerror(Exception): | |
7 | def __init__(self, message, page): | |
8 | super().__init__(message) | |
9 | self.page = page | |
08e259d7 | 10 | |
c39028a4 FT |
11 | def iterlast(itr, default=None): |
12 | if default is not None: | |
13 | ret = default | |
14 | try: | |
15 | while True: | |
16 | ret = next(itr) | |
17 | except StopIteration: | |
18 | return ret | |
19 | ||
20 | def find1(el, *args, **kwargs): | |
21 | ret = el.find(*args, **kwargs) | |
22 | if ret is None: | |
23 | raise pageerror("could not find expected element", iterlast(el.parents, el)) | |
24 | return ret | |
25 | ||
08e259d7 FT |
26 | def byclass(el, name, cl): |
27 | for ch in el.findAll(name): | |
81be6921 FT |
28 | if not isinstance(ch, bs4.Tag): continue |
29 | cll = ch.get("class", []) | |
30 | if cl in cll: | |
08e259d7 FT |
31 | return ch |
32 | return None | |
33 | ||
34 | def nextel(el): | |
35 | while True: | |
36 | el = el.nextSibling | |
81be6921 | 37 | if isinstance(el, bs4.Tag): |
08e259d7 FT |
38 | return el |
39 | ||
c39028a4 FT |
40 | def fetchreader(lib, readerid, page): |
41 | pg = soupify(lib.sess.fetch(lib.base + "areader?" + urllib.parse.urlencode({"id": readerid, "p": str(page)}), | |
42 | headers={"Referer": "http://bato.to/reader"})) | |
43 | return pg | |
44 | ||
08e259d7 | 45 | class page(lib.page): |
c39028a4 | 46 | def __init__(self, chapter, stack, readerid, n): |
08e259d7 | 47 | self.stack = stack |
c39028a4 | 48 | self.lib = chapter.lib |
08e259d7 FT |
49 | self.chapter = chapter |
50 | self.n = n | |
51 | self.id = str(n) | |
81be6921 | 52 | self.name = "Page %s" % n |
c39028a4 | 53 | self.readerid = readerid |
08e259d7 FT |
54 | self.ciurl = None |
55 | ||
56 | def iurl(self): | |
57 | if self.ciurl is None: | |
c39028a4 FT |
58 | page = fetchreader(self.lib, self.readerid, self.n) |
59 | img = find1(page, "img", id="comic_page") | |
81be6921 | 60 | self.ciurl = img["src"] |
08e259d7 FT |
61 | return self.ciurl |
62 | ||
63 | def open(self): | |
64 | return lib.stdimgstream(self.iurl()) | |
65 | ||
66 | def __str__(self): | |
67 | return self.name | |
68 | ||
69 | def __repr(self): | |
5c11ebea | 70 | return "<batoto.page %r.%r.%r.%r>" % (self.chapter.manga.name, self.chapter.group.name, self.chapter.name, self.name) |
08e259d7 FT |
71 | |
72 | class chapter(lib.pagelist): | |
5c11ebea | 73 | def __init__(self, group, stack, id, name, readerid): |
08e259d7 | 74 | self.stack = stack |
5c11ebea FT |
75 | self.group = group |
76 | self.manga = group.manga | |
77 | self.lib = self.manga.lib | |
08e259d7 FT |
78 | self.id = id |
79 | self.name = name | |
c39028a4 | 80 | self.readerid = readerid |
08e259d7 FT |
81 | self.cpag = None |
82 | ||
83 | def __getitem__(self, i): | |
84 | return self.pages()[i] | |
85 | ||
86 | def __len__(self): | |
87 | return len(self.pages()) | |
88 | ||
89 | pnre = re.compile(r"page (\d+)") | |
90 | def pages(self): | |
91 | if self.cpag is None: | |
c39028a4 | 92 | pg = fetchreader(self.lib, self.readerid, 1) |
08e259d7 | 93 | cpag = [] |
c39028a4 | 94 | for opt in find1(pg, "select", id="page_select").findAll("option"): |
08e259d7 | 95 | n = int(self.pnre.match(opt.string).group(1)) |
c39028a4 | 96 | cpag.append(page(self, self.stack + [(self, len(cpag))], self.readerid, n)) |
08e259d7 FT |
97 | self.cpag = cpag |
98 | return self.cpag | |
99 | ||
100 | def __str__(self): | |
101 | return self.name | |
102 | ||
103 | def __repr__(self): | |
5c11ebea FT |
104 | return "<batoto.chapter %r.%r.%r>" % (self.manga.name, self.group.name, self.name) |
105 | ||
106 | class group(lib.pagelist): | |
107 | def __init__(self, manga, stack, id, name): | |
108 | self.stack = stack | |
109 | self.manga = manga | |
110 | self.id = id | |
111 | self.name = name | |
112 | self.ch = [] | |
113 | ||
114 | def __getitem__(self, i): | |
115 | return self.ch[i] | |
116 | ||
117 | def __len__(self): | |
118 | return len(self.ch) | |
119 | ||
120 | def __str__(self): | |
121 | return self.name | |
122 | ||
123 | def __repr__(self): | |
124 | return "<batoto.group %r.%r" % (self.manga.name, self.name) | |
08e259d7 FT |
125 | |
126 | class manga(lib.manga): | |
127 | def __init__(self, lib, id, name, url): | |
128 | self.lib = lib | |
81be6921 | 129 | self.sess = lib.sess |
08e259d7 FT |
130 | self.id = id |
131 | self.name = name | |
132 | self.url = url | |
133 | self.cch = None | |
134 | self.stack = [] | |
1043cbdb | 135 | self.cnames = None |
08e259d7 FT |
136 | |
137 | def __getitem__(self, i): | |
138 | return self.ch()[i] | |
139 | ||
140 | def __len__(self): | |
141 | return len(self.ch()) | |
142 | ||
81be6921 FT |
143 | @staticmethod |
144 | def vfylogin(page): | |
145 | if page.find("div", id="register_notice"): | |
146 | return False | |
147 | if not byclass(page, "table", "chapters_list"): | |
148 | return False | |
149 | return True | |
150 | ||
c39028a4 | 151 | cure = re.compile(r"/reader#([a-z0-9]+)") |
08e259d7 FT |
152 | def ch(self): |
153 | if self.cch is None: | |
81be6921 FT |
154 | page = self.sess.lfetch(self.url, self.vfylogin) |
155 | cls = byclass(page, "table", "chapters_list") | |
08e259d7 FT |
156 | if cls.tbody is not None: |
157 | cls = cls.tbody | |
81be6921 | 158 | scl = "lang_" + self.lib.lang |
08e259d7 FT |
159 | cch = [] |
160 | for ch in cls.childGenerator(): | |
81be6921 FT |
161 | if isinstance(ch, bs4.Tag) and ch.name == "tr": |
162 | cll = ch.get("class", []) | |
163 | if "row" in cll and scl in cll: | |
164 | url = ch.td.a["href"] | |
08e259d7 | 165 | m = self.cure.search(url) |
81be6921 | 166 | if m is None: raise pageerror("Got weird chapter URL: %r" % url, page) |
c39028a4 | 167 | readerid = m.group(1) |
08e259d7 | 168 | name = ch.td.a.text |
5c11ebea FT |
169 | gname = nextel(nextel(ch.td)).text.strip() |
170 | cch.append((readerid, name, gname)) | |
08e259d7 | 171 | cch.reverse() |
5c11ebea FT |
172 | groups = {} |
173 | for n, (readerid, name, gname) in enumerate(cch): | |
174 | groups.setdefault(gname, [n, []])[1].append((readerid, name)) | |
175 | groups = sorted(groups.items(), key=lambda o: o[1][0]) | |
176 | rgrp = [] | |
177 | for n, (gname, (_, gch)) in enumerate(groups): | |
178 | ngrp = group(self, [(self, n)], gname, gname) | |
179 | for m, (readerid, name) in enumerate(gch): | |
180 | ngrp.ch.append(chapter(ngrp, ngrp.stack + [(ngrp, m)], readerid, name, readerid)) | |
181 | rgrp.append(ngrp) | |
182 | self.cch = rgrp | |
08e259d7 FT |
183 | return self.cch |
184 | ||
1043cbdb FT |
185 | def altnames(self): |
186 | if self.cnames is None: | |
81be6921 | 187 | page = soupify(self.sess.fetch(self.url)) |
1043cbdb FT |
188 | cnames = None |
189 | for tbl in page.findAll("table", attrs={"class": "ipb_table"}): | |
190 | if tbl.tbody is not None: tbl = tbl.tbody | |
191 | for tr in tbl.findAll("tr"): | |
81be6921 | 192 | if "Alt Names:" in tr.td.text: |
1043cbdb | 193 | nls = nextel(tr.td) |
81be6921 FT |
194 | if nls.name != "td" or nls.span is None: |
195 | raise pageerror("Weird altnames table in " + self.id, page) | |
1043cbdb FT |
196 | cnames = [nm.text.strip() for nm in nls.findAll("span")] |
197 | break | |
198 | if cnames is not None: | |
199 | break | |
200 | if cnames is None: | |
81be6921 | 201 | raise pageerror("Could not find altnames for " + self.id, page) |
1043cbdb FT |
202 | self.cnames = cnames |
203 | return self.cnames | |
204 | ||
08e259d7 FT |
205 | def __str__(self): |
206 | return self.name | |
207 | ||
208 | def __repr__(self): | |
209 | return "<batoto.manga %r>" % self.name | |
210 | ||
81be6921 FT |
211 | class credentials(object): |
212 | def __init__(self, username, password): | |
213 | self.username = username | |
214 | self.password = password | |
215 | ||
216 | @classmethod | |
217 | def fromfile(cls, path): | |
218 | username, password = None, None | |
219 | with open(path) as fp: | |
220 | for words in profile.splitlines(fp): | |
221 | if words[0] == "username": | |
222 | username = words[1] | |
223 | elif words[0] == "password": | |
224 | password = words[1] | |
225 | elif words[0] == "pass64": | |
226 | import binascii | |
227 | password = binascii.a2b_base64(words[1]).decode("utf8") | |
228 | if None in (username, password): | |
229 | raise ValueError("Incomplete profile: " + path) | |
230 | return cls(username, password) | |
231 | ||
232 | @classmethod | |
233 | def default(cls): | |
234 | path = os.path.join(profile.confdir, "batoto") | |
235 | if os.path.exists(path): | |
236 | return cls.fromfile(path) | |
237 | return None | |
238 | ||
239 | class session(object): | |
240 | def __init__(self, base, credentials): | |
241 | self.base = base | |
242 | self.creds = credentials | |
243 | self.jar = http.cookiejar.CookieJar() | |
244 | self.web = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.jar)) | |
c39028a4 | 245 | self.lastlogin = 0 |
81be6921 FT |
246 | |
247 | rlre = re.compile(r"Welcome, (.*) ") | |
c39028a4 FT |
248 | def dologin(self, pre=None): |
249 | now = time.time() | |
250 | if now - self.lastlogin < 60: | |
251 | raise Exception("Too soon since last login attempt") | |
252 | if pre is None: | |
253 | with self.web.open(self.base) as hs: | |
254 | page = soupify(hs.read()) | |
255 | else: | |
256 | page = pre | |
81be6921 FT |
257 | |
258 | cur = page.find("a", id="user_link") | |
81be6921 | 259 | if cur: |
c39028a4 | 260 | m = self.rlre.search(cur.text) |
81be6921 | 261 | if not m or m.group(1) != self.creds.username: |
81be6921 FT |
262 | outurl = None |
263 | nav = page.find("div", id="user_navigation") | |
264 | if nav: | |
265 | for li in nav.findAll("li"): | |
266 | if li.a and "Sign Out" in li.a.string: | |
267 | outurl = li.a["href"] | |
268 | if not outurl: | |
269 | raise pageerror("Could not find logout URL", page) | |
270 | with self.wep.open(outurl) as hs: | |
271 | hs.read() | |
272 | with self.web.open(self.base) as hs: | |
273 | page = soupify(hs.read()) | |
274 | else: | |
81be6921 FT |
275 | return |
276 | else: | |
81be6921 FT |
277 | |
278 | form = page.find("form", id="login") | |
c39028a4 FT |
279 | if not form and pre: |
280 | return self.dologin() | |
81be6921 FT |
281 | values = {} |
282 | for el in form.findAll("input", type="hidden"): | |
283 | values[el["name"]] = el["value"] | |
284 | values["ips_username"] = self.creds.username | |
285 | values["ips_password"] = self.creds.password | |
c39028a4 | 286 | values["rememberMe"] = "1" |
81be6921 FT |
287 | values["anonymous"] = "1" |
288 | req = urllib.request.Request(form["action"], urllib.parse.urlencode(values).encode("ascii")) | |
289 | with self.web.open(req) as hs: | |
290 | page = soupify(hs.read()) | |
291 | for resp in page.findAll("p", attrs={"class": "message"}): | |
292 | if resp.strong and "You are now signed in" in resp.strong.string: | |
293 | break | |
294 | else: | |
295 | raise pageerror("Could not log in", page) | |
c39028a4 | 296 | self.lastlogin = now |
81be6921 FT |
297 | |
298 | def open(self, url): | |
299 | return self.web.open(url) | |
300 | ||
c39028a4 FT |
301 | def fetch(self, url, headers=None): |
302 | req = urllib.request.Request(url) | |
303 | if headers is not None: | |
304 | for k, v in headers.items(): | |
305 | req.add_header(k, v) | |
306 | with self.open(req) as hs: | |
81be6921 FT |
307 | return hs.read() |
308 | ||
309 | def lfetch(self, url, ck): | |
310 | page = soupify(self.fetch(url)) | |
311 | if not ck(page): | |
c39028a4 | 312 | self.dologin(pre=page) |
81be6921 FT |
313 | page = soupify(self.fetch(url)) |
314 | if not ck(page): | |
315 | raise pageerror("Could not verify login status despite having logged in", page) | |
316 | return page | |
317 | ||
08e259d7 | 318 | class library(lib.library): |
81be6921 FT |
319 | def __init__(self, *, creds=None): |
320 | if creds is None: | |
321 | creds = credentials.default() | |
322 | self.base = "http://bato.to/" | |
323 | self.sess = session(self.base, creds) | |
324 | self.lang = "English" | |
08e259d7 FT |
325 | |
326 | def byid(self, id): | |
327 | url = self.base + "comic/_/comics/" + id | |
81be6921 | 328 | page = soupify(self.sess.fetch(url)) |
08e259d7 FT |
329 | title = page.find("h1", attrs={"class": "ipsType_pagetitle"}) |
330 | if title is None: | |
331 | raise KeyError(id) | |
332 | return manga(self, id, title.string.strip(), url) | |
333 | ||
24f0a3b7 | 334 | def _search(self, pars): |
1043cbdb FT |
335 | p = 1 |
336 | while True: | |
24f0a3b7 FT |
337 | _pars = dict(pars) |
338 | _pars["p"] = str(p) | |
f96b068d | 339 | resp = urllib.request.urlopen(self.base + "search?" + urllib.parse.urlencode(_pars)) |
1043cbdb | 340 | try: |
c0d3b1a2 | 341 | page = soupify(resp.read()) |
1043cbdb FT |
342 | finally: |
343 | resp.close() | |
344 | rls = page.find("div", id="comic_search_results").table | |
345 | if rls.tbody is not None: | |
346 | rls = rls.tbody | |
347 | hasmore = False | |
348 | for child in rls.findAll("tr"): | |
349 | if child.th is not None: continue | |
81be6921 FT |
350 | if child.get("id", "")[:11] == "comic_rowo_": continue |
351 | if child.get("id") == "show_more_row": | |
1043cbdb FT |
352 | hasmore = True |
353 | continue | |
354 | link = child.td.strong.a | |
81be6921 | 355 | url = link["href"] |
1043cbdb FT |
356 | m = self.rure.search(url) |
357 | if m is None: raise Exception("Got weird manga URL: %r" % url) | |
358 | id = m.group(1) | |
359 | name = link.text.strip() | |
1043cbdb FT |
360 | yield manga(self, id, name, url) |
361 | p += 1 | |
362 | if not hasmore: | |
363 | break | |
24f0a3b7 FT |
364 | |
365 | rure = re.compile(r"/comic/_/([^/]*)$") | |
366 | def search(self, expr): | |
81be6921 | 367 | return self._search({"name": expr, "name_cond": "c"}) |
24f0a3b7 FT |
368 | |
369 | def byname(self, prefix): | |
81be6921 | 370 | for res in self._search({"name": prefix, "name_cond": "s"}): |
24f0a3b7 FT |
371 | if res.name[:len(prefix)].lower() == prefix.lower(): |
372 | yield res | |
373 | else: | |
374 | for aname in res.altnames(): | |
375 | if aname[:len(prefix)].lower() == prefix.lower(): | |
376 | yield manga(self, res.id, aname, res.url) | |
377 | break | |
378 | else: | |
379 | if False: | |
81be6921 FT |
380 | print("eliding " + res.name) |
381 | print(res.altnames()) |