From: Fredrik Tolf Date: Wed, 29 Feb 2012 11:42:09 +0000 (+0100) Subject: Added a proper pagestack to pagelists and pages. X-Git-Url: http://git.dolda2000.com/gitweb/?a=commitdiff_plain;h=3683ab38c8cb8b221c2ec20c898fa47a884d3842;p=automanga.git Added a proper pagestack to pagelists and pages. --- diff --git a/manga/lib.py b/manga/lib.py index b5d13c1..0890eaf 100644 --- a/manga/lib.py +++ b/manga/lib.py @@ -15,7 +15,18 @@ class library(object): Not all libraries need implement this.""" raise NotImplementedError("manga.lib.library iterator") -class pagelist(object): +class pagetree(object): + """Base class for objects in the tree of pages and pagelists. + + All pagetree objects should contain an attribute `stack', contains + a list of pairs. The last pair in the list should be the pagetree + object which yielded this pagetree object, along with the index + which yielded it. Every non-last pair should be the same + information for the pair following it. The only objects with empty + `stack' lists should be `manga' objects.""" + pass + +class pagelist(pagetree): """Class representing a list of either pages, or nested pagelists. Might be, for instance, a volume or a chapter. @@ -43,7 +54,7 @@ class manga(pagelist): and all constraints valid for it.""" pass -class page(object): +class page(pagetree): """Class representing a single page of a manga. Pages make up the leaf nodes of a pagelist tree. diff --git a/manga/mangafox.py b/manga/mangafox.py index 0ebca60..cb28944 100644 --- a/manga/mangafox.py +++ b/manga/mangafox.py @@ -18,7 +18,8 @@ class imgstream(lib.imgstream): return self.bk.read(sz) class page(lib.page): - def __init__(self, chapter, n, url): + def __init__(self, chapter, stack, n, url): + self.stack = stack self.chapter = chapter self.volume = self.chapter.volume self.manga = self.volume.manga @@ -36,7 +37,8 @@ class page(lib.page): return imgstream(self.iurl()) class chapter(lib.pagelist): - def __init__(self, volume, name, url): + def __init__(self, volume, stack, name, url): + self.stack = stack self.volume = volume self.manga = volume.manga self.name = name @@ -58,7 +60,7 @@ class chapter(lib.pagelist): m = l.contents[2].strip() if m[:3] != u"of ": raise Exception("parse error: weird page list for %r" % self) - self.cpag = [page(self, n + 1, self.url + ("%i.html" % (n + 1))) for n in xrange(int(m[3:]))] + self.cpag = [page(self, self.stack + [(self, n)], n + 1, self.url + ("%i.html" % (n + 1))) for n in xrange(int(m[3:]))] return self.cpag def __str__(self): @@ -68,7 +70,8 @@ class chapter(lib.pagelist): return "" % (self.manga.name, self.volume.name, self.name) class volume(lib.pagelist): - def __init__(self, manga, name): + def __init__(self, manga, stack, name): + self.stack = stack self.manga = manga self.name = name self.ch = [] @@ -97,6 +100,7 @@ class manga(lib.manga): self.name = name self.url = url self.cvol = None + self.stack = [] def __getitem__(self, i): return self.vols()[i] @@ -109,12 +113,12 @@ class manga(lib.manga): page = soup(htcache.fetch(self.url)) vls = page.find("div", id="chapters").findAll("div", attrs={"class": "slide"}) self.cvol = [] - for i in xrange(len(vls)): - vol = volume(self, vls[i].find("h3", attrs={"class": "volume"}).contents[0].strip()) - cls = nextel(vls[i]) + for i, vn in enumerate(reversed(vls)): + vol = volume(self, [(self, i)], vn.find("h3", attrs={"class": "volume"}).contents[0].strip()) + cls = nextel(vn) if cls.name != u"ul" or cls["class"] != u"chlist": raise Exception("parse error: weird volume list for %r" % self) - for ch in cls.findAll("li"): + for o, ch in enumerate(reversed(cls.findAll("li"))): n = ch.div.h3 or ch.div.h4 name = n.a.string for span in ch("span"): @@ -126,8 +130,8 @@ class manga(lib.manga): url = n.a["href"].encode("us-ascii") if url[-7:] != "/1.html": raise Exception("parse error: unexpected chapter URL for %r: %s" % (self, url)) - vol.ch.insert(0, chapter(vol, name, url[:-6])) - self.cvol.insert(0, vol) + vol.ch.append(chapter(vol, vol.stack + [(vol, o)], name, url[:-6])) + self.cvol.append(vol) return self.cvol def __str__(self):