1 import urllib.request, urllib.parse, http.cookiejar, re, bs4, os, time
2 from . import profile, lib, htcache
3 soup = bs4.BeautifulSoup
4 soupify = lambda cont: soup(cont, "html.parser")
6 class pageerror(Exception):
7 def __init__(self, message, page):
8 super().__init__(message)
11 def iterlast(itr, default=None):
12 if default is not None:
20 def find1(el, *args, **kwargs):
21 ret = el.find(*args, **kwargs)
23 raise pageerror("could not find expected element", iterlast(el.parents, el))
26 def byclass(el, name, cl):
27 for ch in el.findAll(name):
28 if not isinstance(ch, bs4.Tag): continue
29 cll = ch.get("class", [])
37 if isinstance(el, bs4.Tag):
40 def fetchreader(lib, readerid, page):
41 pg = soupify(lib.sess.fetch(lib.base + "areader?" + urllib.parse.urlencode({"id": readerid, "p": str(page)}),
42 headers={"Referer": "http://bato.to/reader"}))
46 def __init__(self, chapter, stack, readerid, n):
48 self.lib = chapter.lib
49 self.chapter = chapter
52 self.name = "Page %s" % n
53 self.readerid = readerid
57 if self.ciurl is None:
58 page = fetchreader(self.lib, self.readerid, self.n)
59 img = find1(page, "img", id="comic_page")
60 self.ciurl = img["src"]
64 return lib.stdimgstream(self.iurl())
70 return "<batoto.page %r.%r.%r>" % (self.chapter.manga.name, self.chapter.name, self.name)
72 class chapter(lib.pagelist):
73 def __init__(self, manga, stack, id, name, readerid):
79 self.readerid = readerid
82 def __getitem__(self, i):
83 return self.pages()[i]
86 return len(self.pages())
88 pnre = re.compile(r"page (\d+)")
91 pg = fetchreader(self.lib, self.readerid, 1)
93 for opt in find1(pg, "select", id="page_select").findAll("option"):
94 n = int(self.pnre.match(opt.string).group(1))
95 cpag.append(page(self, self.stack + [(self, len(cpag))], self.readerid, n))
103 return "<batoto.chapter %r.%r>" % (self.manga.name, self.name)
105 class manga(lib.manga):
106 def __init__(self, lib, id, name, url):
116 def __getitem__(self, i):
120 return len(self.ch())
124 if page.find("div", id="register_notice"):
126 if not byclass(page, "table", "chapters_list"):
130 cure = re.compile(r"/reader#([a-z0-9]+)")
133 page = self.sess.lfetch(self.url, self.vfylogin)
134 cls = byclass(page, "table", "chapters_list")
135 if cls.tbody is not None:
137 scl = "lang_" + self.lib.lang
139 for ch in cls.childGenerator():
140 if isinstance(ch, bs4.Tag) and ch.name == "tr":
141 cll = ch.get("class", [])
142 if "row" in cll and scl in cll:
143 url = ch.td.a["href"]
144 m = self.cure.search(url)
145 if m is None: raise pageerror("Got weird chapter URL: %r" % url, page)
146 readerid = m.group(1)
148 cch.append((readerid, name))
151 for n, (readerid, name) in enumerate(cch):
152 rch.append(chapter(self, [(self, n)], readerid, name, readerid))
157 if self.cnames is None:
158 page = soupify(self.sess.fetch(self.url))
160 for tbl in page.findAll("table", attrs={"class": "ipb_table"}):
161 if tbl.tbody is not None: tbl = tbl.tbody
162 for tr in tbl.findAll("tr"):
163 if "Alt Names:" in tr.td.text:
165 if nls.name != "td" or nls.span is None:
166 raise pageerror("Weird altnames table in " + self.id, page)
167 cnames = [nm.text.strip() for nm in nls.findAll("span")]
169 if cnames is not None:
172 raise pageerror("Could not find altnames for " + self.id, page)
180 return "<batoto.manga %r>" % self.name
182 class credentials(object):
183 def __init__(self, username, password):
184 self.username = username
185 self.password = password
188 def fromfile(cls, path):
189 username, password = None, None
190 with open(path) as fp:
191 for words in profile.splitlines(fp):
192 if words[0] == "username":
194 elif words[0] == "password":
196 elif words[0] == "pass64":
198 password = binascii.a2b_base64(words[1]).decode("utf8")
199 if None in (username, password):
200 raise ValueError("Incomplete profile: " + path)
201 return cls(username, password)
205 path = os.path.join(profile.confdir, "batoto")
206 if os.path.exists(path):
207 return cls.fromfile(path)
210 class session(object):
211 def __init__(self, base, credentials):
213 self.creds = credentials
214 self.jar = http.cookiejar.CookieJar()
215 self.web = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.jar))
218 rlre = re.compile(r"Welcome, (.*) ")
219 def dologin(self, pre=None):
221 if now - self.lastlogin < 60:
222 raise Exception("Too soon since last login attempt")
224 with self.web.open(self.base) as hs:
225 page = soupify(hs.read())
229 cur = page.find("a", id="user_link")
231 m = self.rlre.search(cur.text)
232 if not m or m.group(1) != self.creds.username:
234 nav = page.find("div", id="user_navigation")
236 for li in nav.findAll("li"):
237 if li.a and "Sign Out" in li.a.string:
238 outurl = li.a["href"]
240 raise pageerror("Could not find logout URL", page)
241 with self.wep.open(outurl) as hs:
243 with self.web.open(self.base) as hs:
244 page = soupify(hs.read())
249 form = page.find("form", id="login")
251 return self.dologin()
253 for el in form.findAll("input", type="hidden"):
254 values[el["name"]] = el["value"]
255 values["ips_username"] = self.creds.username
256 values["ips_password"] = self.creds.password
257 values["rememberMe"] = "1"
258 values["anonymous"] = "1"
259 req = urllib.request.Request(form["action"], urllib.parse.urlencode(values).encode("ascii"))
260 with self.web.open(req) as hs:
261 page = soupify(hs.read())
262 for resp in page.findAll("p", attrs={"class": "message"}):
263 if resp.strong and "You are now signed in" in resp.strong.string:
266 raise pageerror("Could not log in", page)
270 return self.web.open(url)
272 def fetch(self, url, headers=None):
273 req = urllib.request.Request(url)
274 if headers is not None:
275 for k, v in headers.items():
277 with self.open(req) as hs:
280 def lfetch(self, url, ck):
281 page = soupify(self.fetch(url))
283 self.dologin(pre=page)
284 page = soupify(self.fetch(url))
286 raise pageerror("Could not verify login status despite having logged in", page)
289 class library(lib.library):
290 def __init__(self, *, creds=None):
292 creds = credentials.default()
293 self.base = "http://bato.to/"
294 self.sess = session(self.base, creds)
295 self.lang = "English"
298 url = self.base + "comic/_/comics/" + id
299 page = soupify(self.sess.fetch(url))
300 title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
303 return manga(self, id, title.string.strip(), url)
305 def _search(self, pars):
310 resp = urllib.request.urlopen(self.base + "search?" + urllib.parse.urlencode(_pars))
312 page = soupify(resp.read())
315 rls = page.find("div", id="comic_search_results").table
316 if rls.tbody is not None:
319 for child in rls.findAll("tr"):
320 if child.th is not None: continue
321 if child.get("id", "")[:11] == "comic_rowo_": continue
322 if child.get("id") == "show_more_row":
325 link = child.td.strong.a
327 m = self.rure.search(url)
328 if m is None: raise Exception("Got weird manga URL: %r" % url)
330 name = link.text.strip()
331 yield manga(self, id, name, url)
336 rure = re.compile(r"/comic/_/([^/]*)$")
337 def search(self, expr):
338 return self._search({"name": expr, "name_cond": "c"})
340 def byname(self, prefix):
341 for res in self._search({"name": prefix, "name_cond": "s"}):
342 if res.name[:len(prefix)].lower() == prefix.lower():
345 for aname in res.altnames():
346 if aname[:len(prefix)].lower() == prefix.lower():
347 yield manga(self, res.id, aname, res.url)
351 print("eliding " + res.name)
352 print(res.altnames())