import urllib, re, BeautifulSoup
import lib, htcache
soup = BeautifulSoup.BeautifulSoup
+soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES)
def byclass(el, name, cl):
for ch in el.findAll(name):
def iurl(self):
if self.ciurl is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
img = nextel(page.find("div", id="full_image")).img
self.ciurl = img["src"].encode("us-ascii")
return self.ciurl
pnre = re.compile(r"page (\d+)")
def pages(self):
if self.cpag is None:
- pg = soup(htcache.fetch(self.url))
+ pg = soupify(htcache.fetch(self.url))
cpag = []
for opt in pg.find("select", id="page_select").findAll("option"):
url = opt["value"].encode("us-ascii")
cure = re.compile(r"/read/_/(\d+)/[^/]*")
def ch(self):
if self.cch is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
cls = byclass(page, u"table", u"chapters_list")
if cls.tbody is not None:
cls = cls.tbody
def altnames(self):
if self.cnames is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
cnames = None
for tbl in page.findAll("table", attrs={"class": "ipb_table"}):
if tbl.tbody is not None: tbl = tbl.tbody
def byid(self, id):
url = self.base + "comic/_/comics/" + id
- page = soup(htcache.fetch(url))
+ page = soupify(htcache.fetch(url))
title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
if title is None:
raise KeyError(id)
_pars["p"] = str(p)
resp = urllib.urlopen(self.base + "search?" + urllib.urlencode(_pars))
try:
- page = soup(resp.read())
+ page = soupify(resp.read())
finally:
resp.close()
rls = page.find("div", id="comic_search_results").table