1 import urllib, re, BeautifulSoup
3 soup = BeautifulSoup.BeautifulSoup
5 def byclass(el, name, cl):
6 for ch in el.findAll(name):
7 if not isinstance(ch, BeautifulSoup.Tag): continue
8 cll = ch.get("class", "")
16 if isinstance(el, BeautifulSoup.Tag):
20 def __init__(self, chapter, stack, n, url):
22 self.chapter = chapter
25 self.name = u"Page %s" % n
30 if self.ciurl is None:
31 page = soup(htcache.fetch(self.url))
32 img = nextel(page.find("div", id="full_image")).img
33 self.ciurl = img["src"].encode("us-ascii")
37 return lib.stdimgstream(self.iurl())
43 return "<batoto.page %r.%r.%r>" % (self.chapter.manga.name, self.chapter.name, self.name)
45 class chapter(lib.pagelist):
46 def __init__(self, manga, stack, id, name, url):
54 def __getitem__(self, i):
55 return self.pages()[i]
58 return len(self.pages())
60 pnre = re.compile(r"page (\d+)")
63 pg = soup(htcache.fetch(self.url))
65 for opt in pg.find("select", id="page_select").findAll("option"):
66 url = opt["value"].encode("us-ascii")
67 n = int(self.pnre.match(opt.string).group(1))
68 cpag.append(page(self, self.stack + [(self, len(cpag))], n, url))
76 return "<batoto.chapter %r.%r>" % (self.manga.name, self.name)
78 class manga(lib.manga):
79 def __init__(self, lib, id, name, url):
88 def __getitem__(self, i):
94 cure = re.compile(r"/read/_/(\d+)/[^/]*")
97 page = soup(htcache.fetch(self.url))
98 cls = byclass(page, u"table", u"chapters_list")
99 if cls.tbody is not None:
101 scl = u"lang_" + self.lib.lang
103 for ch in cls.childGenerator():
104 if isinstance(ch, BeautifulSoup.Tag) and ch.name == u"tr":
105 cll = ch.get("class", "").split()
106 if u"row" in cll and scl in cll:
107 url = ch.td.a["href"].encode("us-ascii")
108 m = self.cure.search(url)
109 if m is None: raise Exception("Got weird chapter URL: %r" % url)
111 url = self.lib.base + "read/_/" + cid
113 cch.append((cid, name, url))
116 for n, (cid, name, url) in enumerate(cch):
117 rch.append(chapter(self, [(self, n)], cid, name, url))
122 if self.cnames is None:
123 page = soup(htcache.fetch(self.url))
125 for tbl in page.findAll("table", attrs={"class": "ipb_table"}):
126 if tbl.tbody is not None: tbl = tbl.tbody
127 for tr in tbl.findAll("tr"):
128 if u"Alt Names:" in tr.td.text:
130 if nls.name != u"td" or nls.span is None:
131 raise Exception("Weird altnames table in " + self.id)
132 cnames = [nm.text.strip() for nm in nls.findAll("span")]
134 if cnames is not None:
137 raise Exception("Could not find altnames for " + self.id)
145 return "<batoto.manga %r>" % self.name
147 class library(lib.library):
149 self.base = "http://www.batoto.net/"
150 self.lang = u"English"
153 url = self.base + "comic/_/comics/" + id
154 page = soup(htcache.fetch(url))
155 title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
158 return manga(self, id, title.string.strip(), url)
160 mure = re.compile(r"/comic/_/comics/([^/]*)$")
161 def search(self, expr):
162 resp = urllib.urlopen(self.base + "forums/index.php?app=core&module=search&do=search&fromMainBar=1",
163 urllib.urlencode({"search_term": expr, "search_app": "ccs:database:3"}))
165 page = soup(resp.read())
168 none = page.find("p", attrs={"class": "no_messages"})
169 if none is not None and u"No results" in none.text:
172 for child in page.find("div", id="search_results").ol.childGenerator():
173 if isinstance(child, BeautifulSoup.Tag) and child.name == u"li":
174 info = child.find("div", attrs={"class": "result_info"})
175 url = info.h3.a["href"].encode("us-ascii")
176 m = self.mure.search(url)
177 if m is None: raise Exception("Got weird manga URL: %r" % url)
179 name = info.h3.a.string.strip()
180 ret.append(manga(self, id, name, url))
183 rure = re.compile(r"/comic/_/([^/]*)$")
184 def byname(self, prefix):
185 if not isinstance(prefix, unicode):
186 prefix = prefix.decode("utf8")
189 resp = urllib.urlopen(self.base + "search?" + urllib.urlencode({"name": prefix.encode("utf8"), "name_cond": "s", "p": str(p)}))
191 page = soup(resp.read())
194 rls = page.find("div", id="comic_search_results").table
195 if rls.tbody is not None:
198 for child in rls.findAll("tr"):
199 if child.th is not None: continue
200 if child.get("id") == u"show_more_row":
203 link = child.td.strong.a
204 url = link["href"].encode("us-ascii")
205 m = self.rure.search(url)
206 if m is None: raise Exception("Got weird manga URL: %r" % url)
208 name = link.text.strip()
209 if name[:len(prefix)].lower() != prefix.lower():
210 m = manga(self, id, name, url)
211 for aname in m.altnames():
212 if aname[:len(prefix)].lower() == prefix.lower():
217 print "eliding " + name
220 yield manga(self, id, name, url)