-import urllib
-import BeautifulSoup, urlparse
-import lib, htcache
-soup = BeautifulSoup.BeautifulSoup
-
-class imgstream(lib.imgstream):
- def __init__(self, url):
- self.bk = urllib.urlopen(url)
- ok = False
- try:
- if self.bk.getcode() != 200:
- raise IOError("Server error: " + str(self.bk.getcode()))
- self.ctype = self.bk.info()["Content-Type"]
- self.clen = int(self.bk.info()["Content-Length"])
- ok = True
- finally:
- if not ok:
- self.bk.close()
-
- def fileno(self):
- return self.bk.fileno()
-
- def close(self):
- self.bk.close()
-
- def read(self, sz = None):
- if sz is None:
- return self.bk.read()
- else:
- return self.bk.read(sz)
+import bs4
+from urllib.parse import urljoin
+from . import lib, htcache
+soup = bs4.BeautifulSoup
+soupify = lambda cont: soup(cont, "html.parser")
class page(lib.page):
def __init__(self, chapter, stack, n, url):
self.manga = chapter.manga
self.n = n
self.id = str(n)
- self.name = u"Page %s" % n
+ self.name = "Page %s" % n
self.url = url
self.ciurl = None
def iurl(self):
if self.ciurl is None:
- page = soup(htcache.fetch(self.url))
- self.ciurl = page.find("div", id="imgholder").find("img", id="img")["src"].encode("us-ascii")
+ page = soupify(htcache.fetch(self.url))
+ self.ciurl = page.find("div", id="imgholder").find("img", id="img")["src"]
return self.ciurl
def open(self):
- return imgstream(self.iurl())
+ return lib.stdimgstream(self.iurl())
def __str__(self):
return self.name
def pages(self):
if self.cpag is None:
- pg = soup(htcache.fetch(self.url))
+ pg = soupify(htcache.fetch(self.url))
pag = []
for opt in pg.find("div", id="selectpage").find("select", id="pageMenu").findAll("option"):
- url = urlparse.urljoin(self.url, opt["value"].encode("us-ascii"))
+ url = urljoin(self.url, opt["value"])
n = int(opt.string)
pag.append(page(self, self.stack + [(self, len(pag))], n, url))
self.cpag = pag
def ch(self):
if self.cch is None:
- page = soup(htcache.fetch(self.url))
+ page = soupify(htcache.fetch(self.url))
cls = page.find("div", id="chapterlist").find("table", id="listing")
i = 0
cch = []
td = tr.find("td")
if td is None: continue
cla = td.find("a")
- url = urlparse.urljoin(self.url, cla["href"].encode("us-ascii"))
- name = cla.string
- cid = name.encode("utf8")
- if isinstance(cla.nextSibling, unicode):
- ncont = unicode(cla.nextSibling)
- if ncont[:3] == u" : ":
- name += u": " + ncont[3:]
+ url = urljoin(self.url, cla["href"])
+ cid = name = cla.string
+ if isinstance(cla.nextSibling, str):
+ ncont = str(cla.nextSibling)
+ if len(ncont) > 3 and ncont[:3] == " : ":
+ name += ": " + ncont[3:]
cch.append(chapter(self, [(self, len(cch))], cid, name, url))
self.cch = cch
return self.cch
def byid(self, id):
url = self.base + id
- page = soup(htcache.fetch(url))
+ page = soupify(htcache.fetch(url))
if page.find("h2", attrs={"class": "aname"}) is None:
raise KeyError(id)
name = page.find("h2", attrs={"class": "aname"}).string
return manga(self, id, name, url)
+
+ def __iter__(self):
+ page = soupify(htcache.fetch(self.base + "alphabetical"))
+ for sec in page.findAll("div", attrs={"class": "series_alpha"}):
+ for li in sec.find("ul", attrs={"class": "series_alpha"}).findAll("li"):
+ url = li.a["href"]
+ name = li.a.string
+ if url[:1] != "/": continue
+ id = url[1:]
+ if '/' in id:
+ # Does this distinction mean something?
+ id = id[id.rindex('/') + 1:]
+ if id[-5:] != ".html":
+ continue
+ id = id[:-5]
+ yield manga(self, id, name, urljoin(self.base, url))
+
+ def byname(self, prefix):
+ prefix = prefix.lower()
+ for manga in self:
+ if manga.name.lower()[:len(prefix)] == prefix:
+ yield manga
+
+ def search(self, expr):
+ expr = expr.lower()
+ for manga in self:
+ if expr in manga.name.lower():
+ yield manga