| 1 | import urllib, re, BeautifulSoup |
| 2 | import lib, htcache |
| 3 | soup = BeautifulSoup.BeautifulSoup |
| 4 | |
| 5 | def byclass(el, name, cl): |
| 6 | for ch in el.findAll(name): |
| 7 | if not isinstance(ch, BeautifulSoup.Tag): continue |
| 8 | cll = ch.get("class", "") |
| 9 | if cl in cll.split(): |
| 10 | return ch |
| 11 | return None |
| 12 | |
| 13 | def nextel(el): |
| 14 | while True: |
| 15 | el = el.nextSibling |
| 16 | if isinstance(el, BeautifulSoup.Tag): |
| 17 | return el |
| 18 | |
| 19 | class page(lib.page): |
| 20 | def __init__(self, chapter, stack, n, url): |
| 21 | self.stack = stack |
| 22 | self.chapter = chapter |
| 23 | self.n = n |
| 24 | self.id = str(n) |
| 25 | self.name = u"Page %s" % n |
| 26 | self.url = url |
| 27 | self.ciurl = None |
| 28 | |
| 29 | def iurl(self): |
| 30 | if self.ciurl is None: |
| 31 | page = soup(htcache.fetch(self.url)) |
| 32 | img = nextel(page.find("div", id="full_image")).img |
| 33 | self.ciurl = img["src"].encode("us-ascii") |
| 34 | return self.ciurl |
| 35 | |
| 36 | def open(self): |
| 37 | return lib.stdimgstream(self.iurl()) |
| 38 | |
| 39 | def __str__(self): |
| 40 | return self.name |
| 41 | |
| 42 | def __repr(self): |
| 43 | return "<batoto.page %r.%r.%r>" % (self.chapter.manga.name, self.chapter.name, self.name) |
| 44 | |
| 45 | class chapter(lib.pagelist): |
| 46 | def __init__(self, manga, stack, id, name, url): |
| 47 | self.stack = stack |
| 48 | self.manga = manga |
| 49 | self.id = id |
| 50 | self.name = name |
| 51 | self.url = url |
| 52 | self.cpag = None |
| 53 | |
| 54 | def __getitem__(self, i): |
| 55 | return self.pages()[i] |
| 56 | |
| 57 | def __len__(self): |
| 58 | return len(self.pages()) |
| 59 | |
| 60 | pnre = re.compile(r"page (\d+)") |
| 61 | def pages(self): |
| 62 | if self.cpag is None: |
| 63 | pg = soup(htcache.fetch(self.url)) |
| 64 | cpag = [] |
| 65 | for opt in pg.find("select", id="page_select").findAll("option"): |
| 66 | url = opt["value"].encode("us-ascii") |
| 67 | n = int(self.pnre.match(opt.string).group(1)) |
| 68 | cpag.append(page(self, self.stack + [(self, len(cpag))], n, url)) |
| 69 | self.cpag = cpag |
| 70 | return self.cpag |
| 71 | |
| 72 | def __str__(self): |
| 73 | return self.name |
| 74 | |
| 75 | def __repr__(self): |
| 76 | return "<batoto.chapter %r.%r>" % (self.manga.name, self.name) |
| 77 | |
| 78 | class manga(lib.manga): |
| 79 | def __init__(self, lib, id, name, url): |
| 80 | self.lib = lib |
| 81 | self.id = id |
| 82 | self.name = name |
| 83 | self.url = url |
| 84 | self.cch = None |
| 85 | self.stack = [] |
| 86 | self.cnames = None |
| 87 | |
| 88 | def __getitem__(self, i): |
| 89 | return self.ch()[i] |
| 90 | |
| 91 | def __len__(self): |
| 92 | return len(self.ch()) |
| 93 | |
| 94 | cure = re.compile(r"/read/_/(\d+)/[^/]*") |
| 95 | def ch(self): |
| 96 | if self.cch is None: |
| 97 | page = soup(htcache.fetch(self.url)) |
| 98 | cls = byclass(page, u"table", u"chapters_list") |
| 99 | if cls.tbody is not None: |
| 100 | cls = cls.tbody |
| 101 | scl = u"lang_" + self.lib.lang |
| 102 | cch = [] |
| 103 | for ch in cls.childGenerator(): |
| 104 | if isinstance(ch, BeautifulSoup.Tag) and ch.name == u"tr": |
| 105 | cll = ch.get("class", "").split() |
| 106 | if u"row" in cll and scl in cll: |
| 107 | url = ch.td.a["href"].encode("us-ascii") |
| 108 | m = self.cure.search(url) |
| 109 | if m is None: raise Exception("Got weird chapter URL: %r" % url) |
| 110 | cid = m.group(1) |
| 111 | url = self.lib.base + "read/_/" + cid |
| 112 | name = ch.td.a.text |
| 113 | cch.append((cid, name, url)) |
| 114 | cch.reverse() |
| 115 | rch = [] |
| 116 | for n, (cid, name, url) in enumerate(cch): |
| 117 | rch.append(chapter(self, [(self, n)], cid, name, url)) |
| 118 | self.cch = rch |
| 119 | return self.cch |
| 120 | |
| 121 | def altnames(self): |
| 122 | if self.cnames is None: |
| 123 | page = soup(htcache.fetch(self.url)) |
| 124 | cnames = None |
| 125 | for tbl in page.findAll("table", attrs={"class": "ipb_table"}): |
| 126 | if tbl.tbody is not None: tbl = tbl.tbody |
| 127 | for tr in tbl.findAll("tr"): |
| 128 | if u"Alt Names:" in tr.td.text: |
| 129 | nls = nextel(tr.td) |
| 130 | if nls.name != u"td" or nls.span is None: |
| 131 | raise Exception("Weird altnames table in " + self.id) |
| 132 | cnames = [nm.text.strip() for nm in nls.findAll("span")] |
| 133 | break |
| 134 | if cnames is not None: |
| 135 | break |
| 136 | if cnames is None: |
| 137 | raise Exception("Could not find altnames for " + self.id) |
| 138 | self.cnames = cnames |
| 139 | return self.cnames |
| 140 | |
| 141 | def __str__(self): |
| 142 | return self.name |
| 143 | |
| 144 | def __repr__(self): |
| 145 | return "<batoto.manga %r>" % self.name |
| 146 | |
| 147 | class library(lib.library): |
| 148 | def __init__(self): |
| 149 | self.base = "http://www.batoto.net/" |
| 150 | self.lang = u"English" |
| 151 | |
| 152 | def byid(self, id): |
| 153 | url = self.base + "comic/_/comics/" + id |
| 154 | page = soup(htcache.fetch(url)) |
| 155 | title = page.find("h1", attrs={"class": "ipsType_pagetitle"}) |
| 156 | if title is None: |
| 157 | raise KeyError(id) |
| 158 | return manga(self, id, title.string.strip(), url) |
| 159 | |
| 160 | mure = re.compile(r"/comic/_/comics/([^/]*)$") |
| 161 | def search(self, expr): |
| 162 | resp = urllib.urlopen(self.base + "forums/index.php?app=core&module=search&do=search&fromMainBar=1", |
| 163 | urllib.urlencode({"search_term": expr, "search_app": "ccs:database:3"})) |
| 164 | try: |
| 165 | page = soup(resp.read()) |
| 166 | finally: |
| 167 | resp.close() |
| 168 | none = page.find("p", attrs={"class": "no_messages"}) |
| 169 | if none is not None and u"No results" in none.text: |
| 170 | return [] |
| 171 | ret = [] |
| 172 | for child in page.find("div", id="search_results").ol.childGenerator(): |
| 173 | if isinstance(child, BeautifulSoup.Tag) and child.name == u"li": |
| 174 | info = child.find("div", attrs={"class": "result_info"}) |
| 175 | url = info.h3.a["href"].encode("us-ascii") |
| 176 | m = self.mure.search(url) |
| 177 | if m is None: raise Exception("Got weird manga URL: %r" % url) |
| 178 | id = m.group(1) |
| 179 | name = info.h3.a.string.strip() |
| 180 | ret.append(manga(self, id, name, url)) |
| 181 | return ret |
| 182 | |
| 183 | rure = re.compile(r"/comic/_/([^/]*)$") |
| 184 | def byname(self, prefix): |
| 185 | if not isinstance(prefix, unicode): |
| 186 | prefix = prefix.decode("utf8") |
| 187 | p = 1 |
| 188 | while True: |
| 189 | resp = urllib.urlopen(self.base + "search?" + urllib.urlencode({"name": prefix.encode("utf8"), "name_cond": "s", "p": str(p)})) |
| 190 | try: |
| 191 | page = soup(resp.read()) |
| 192 | finally: |
| 193 | resp.close() |
| 194 | rls = page.find("div", id="comic_search_results").table |
| 195 | if rls.tbody is not None: |
| 196 | rls = rls.tbody |
| 197 | hasmore = False |
| 198 | for child in rls.findAll("tr"): |
| 199 | if child.th is not None: continue |
| 200 | if child.get("id") == u"show_more_row": |
| 201 | hasmore = True |
| 202 | continue |
| 203 | link = child.td.strong.a |
| 204 | url = link["href"].encode("us-ascii") |
| 205 | m = self.rure.search(url) |
| 206 | if m is None: raise Exception("Got weird manga URL: %r" % url) |
| 207 | id = m.group(1) |
| 208 | name = link.text.strip() |
| 209 | if name[:len(prefix)].lower() != prefix.lower(): |
| 210 | m = manga(self, id, name, url) |
| 211 | for aname in m.altnames(): |
| 212 | if aname[:len(prefix)].lower() == prefix.lower(): |
| 213 | name = aname |
| 214 | break |
| 215 | else: |
| 216 | if False: |
| 217 | print "eliding " + name |
| 218 | print m.altnames() |
| 219 | continue |
| 220 | yield manga(self, id, name, url) |
| 221 | p += 1 |
| 222 | if not hasmore: |
| 223 | break |