import urllib, re, BeautifulSoup
import lib, htcache
soup = BeautifulSoup.BeautifulSoup
+soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES)
def byclass(el, name, cl):
for ch in el.findAll(name):
def iurl(self):
if self.ciurl is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
img = nextel(page.find("div", id="full_image")).img
self.ciurl = img["src"].encode("us-ascii")
return self.ciurl
pnre = re.compile(r"page (\d+)")
def pages(self):
if self.cpag is None:
- pg = soup(htcache.fetch(self.url))
+ pg = soupify(htcache.fetch(self.url))
cpag = []
for opt in pg.find("select", id="page_select").findAll("option"):
url = opt["value"].encode("us-ascii")
self.url = url
self.cch = None
self.stack = []
+ self.cnames = None
def __getitem__(self, i):
return self.ch()[i]
cure = re.compile(r"/read/_/(\d+)/[^/]*")
def ch(self):
if self.cch is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
cls = byclass(page, u"table", u"chapters_list")
if cls.tbody is not None:
cls = cls.tbody
self.cch = rch
return self.cch
+ def altnames(self):
+ if self.cnames is None:
+ page = soupify(htcache.fetch(self.url))
+ cnames = None
+ for tbl in page.findAll("table", attrs={"class": "ipb_table"}):
+ if tbl.tbody is not None: tbl = tbl.tbody
+ for tr in tbl.findAll("tr"):
+ if u"Alt Names:" in tr.td.text:
+ nls = nextel(tr.td)
+ if nls.name != u"td" or nls.span is None:
+ raise Exception("Weird altnames table in " + self.id)
+ cnames = [nm.text.strip() for nm in nls.findAll("span")]
+ break
+ if cnames is not None:
+ break
+ if cnames is None:
+ raise Exception("Could not find altnames for " + self.id)
+ self.cnames = cnames
+ return self.cnames
+
def __str__(self):
return self.name
def byid(self, id):
url = self.base + "comic/_/comics/" + id
- page = soup(htcache.fetch(url))
+ page = soupify(htcache.fetch(url))
title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
if title is None:
raise KeyError(id)
return manga(self, id, title.string.strip(), url)
- mure = re.compile(r"/comic/_/comics/([^/]*)$")
- def search(self, expr):
- resp = urllib.urlopen(self.base + "forums/index.php?app=core&module=search&do=search&fromMainBar=1",
- urllib.urlencode({"search_term": expr, "search_app": "ccs:database:3"}))
- try:
- page = soup(resp.read())
- finally:
- resp.close()
- none = page.find("p", attrs={"class": "no_messages"})
- if none is not None and u"No results" in none.text:
- return []
- ret = []
- for child in page.find("div", id="search_results").ol.childGenerator():
- if isinstance(child, BeautifulSoup.Tag) and child.name == u"li":
- info = child.find("div", attrs={"class": "result_info"})
- url = info.h3.a["href"].encode("us-ascii")
- m = self.mure.search(url)
+ def _search(self, pars):
+ p = 1
+ while True:
+ _pars = dict(pars)
+ _pars["p"] = str(p)
+ resp = urllib.urlopen(self.base + "search?" + urllib.urlencode(_pars))
+ try:
+ page = soupify(resp.read())
+ finally:
+ resp.close()
+ rls = page.find("div", id="comic_search_results").table
+ if rls.tbody is not None:
+ rls = rls.tbody
+ hasmore = False
+ for child in rls.findAll("tr"):
+ if child.th is not None: continue
+ if child.get("id", u"")[:11] == u"comic_rowo_": continue
+ if child.get("id") == u"show_more_row":
+ hasmore = True
+ continue
+ link = child.td.strong.a
+ url = link["href"].encode("us-ascii")
+ m = self.rure.search(url)
if m is None: raise Exception("Got weird manga URL: %r" % url)
id = m.group(1)
- name = info.h3.a.string.strip()
- ret.append(manga(self, id, name, url))
- return ret
+ name = link.text.strip()
+ yield manga(self, id, name, url)
+ p += 1
+ if not hasmore:
+ break
+
+ rure = re.compile(r"/comic/_/([^/]*)$")
+ def search(self, expr):
+ if not isinstance(expr, unicode):
+ expr = expr.decode("utf8")
+ return self._search({"name": expr.encode("utf8"), "name_cond": "c"})
+
+ def byname(self, prefix):
+ if not isinstance(prefix, unicode):
+ prefix = prefix.decode("utf8")
+ for res in self._search({"name": prefix.encode("utf8"), "name_cond": "s"}):
+ if res.name[:len(prefix)].lower() == prefix.lower():
+ yield res
+ else:
+ for aname in res.altnames():
+ if aname[:len(prefix)].lower() == prefix.lower():
+ yield manga(self, res.id, aname, res.url)
+ break
+ else:
+ if False:
+ print "eliding " + res.name
+ print res.altnames()