Added more persistent string IDs to the various pagetree nodes.

author Fredrik Tolf <fredrik@dolda2000.com>

Wed, 29 Feb 2012 21:26:17 +0000 (22:26 +0100)

committer Fredrik Tolf <fredrik@dolda2000.com>

Wed, 29 Feb 2012 21:26:17 +0000 (22:26 +0100)
author Fredrik Tolf <fredrik@dolda2000.com>
Wed, 29 Feb 2012 21:26:17 +0000 (22:26 +0100)
committer Fredrik Tolf <fredrik@dolda2000.com>
Wed, 29 Feb 2012 21:26:17 +0000 (22:26 +0100)
diff --git a/manga/lib.py b/manga/lib.py

index d9769933ab5fce72a502575beb227902184d310e..52f75ea9f5e3e60630be362f2519c1d4921e91fd 100644 (file)
--- a/manga/lib.py
+++ b/manga/lib.py
@@ -9,6 +9,13 @@ class library(object):
          All libraries should implement this."""
          raise NotImplementedError()
  
+    def byid(self, id):
+        """Returns a previously known manga by its string ID, or
+        raises KeyError if no such manga could be found.
+
+        All libraries should implement this."""
+        raise KeyError(id)
+
      def __iter__(self):
          """Return an iterator of all known mangas in this library.
  
@@ -18,13 +25,30 @@ class library(object):
  class pagetree(object):
      """Base class for objects in the tree of pages and pagelists.
  
-    All pagetree objects should contain an attribute `stack', contains
-    a list of pairs. The last pair in the list should be the pagetree
-    object which yielded this pagetree object, along with the index
-    which yielded it. Every non-last pair should be the same
+    All pagetree objects should contain an attribute `stack',
+    containing a list of pairs. The last pair in the list should be
+    the pagetree object which yielded this pagetree object, along with
+    the index which yielded it. Every non-last pair should be the same
      information for the pair following it. The only objects with empty
-    `stack' lists should be `manga' objects."""
-    pass
+    `stack' lists should be `manga' objects.
+    
+    All non-root pagetree objects should also contain an attribute
+    `id', which should be a string that can be passed to the `byid'
+    function of its parent node to recover the node. Such string ID
+    should be more persistent than the node's numeric index in the
+    parent."""
+    
+    def idlist(self):
+        """Returns a list of the IDs necessary to resolve this node
+        from the root node."""
+        if len(self.stack) == 0:
+            raise Exception("Cannot get ID list on root node.")
+        return [n.id for n, i in self.stack[1:]] + [self.id]
+
+    def byidlist(self, idlist):
+        if len(idlist) == 0:
+            return self
+        return self.byid(idlist[0]).byidlist(idlist[1:])
  
  class pagelist(pagetree):
      """Class representing a list of either pages, or nested
@@ -49,9 +73,25 @@ class pagelist(pagetree):
          All pagelists need to implement this."""
          raise NotImplementedError()
  
+    def byid(self, id):
+        """Return the direct sub-node of this pagelist which has the
+        given string ID. If none is found, a KeyError is raised.
+
+        This default method iterates the children of this node, but
+        may be overridden by some more efficient implementation.
+        """
+        for ch in self:
+            if ch.id == id:
+                return ch
+        raise KeyError(id)
+
  class manga(pagelist):
      """Class reprenting a single manga. Includes the pagelist class,
-    and all constraints valid for it."""
+    and all constraints valid for it.
+
+    A manga is a root pagetree node, but should also contain an `id'
+    attribute, which can be used to recover the manga from its
+    library's `byid' function."""
      pass
  
  class page(pagetree):
diff --git a/manga/mangafox.py b/manga/mangafox.py

index cb289441932610a34338ebce02a4fae9e065f34f..8c23630c42a43b1419581ec9150b0eb394272c05 100644 (file)
--- a/manga/mangafox.py
+++ b/manga/mangafox.py
@@ -24,6 +24,7 @@ class page(lib.page):
          self.volume = self.chapter.volume
          self.manga = self.volume.manga
          self.n = n
+        self.id = str(n)
          self.url = url
          self.ciurl = None
  
@@ -37,10 +38,11 @@ class page(lib.page):
          return imgstream(self.iurl())
  
  class chapter(lib.pagelist):
-    def __init__(self, volume, stack, name, url):
+    def __init__(self, volume, stack, id, name, url):
          self.stack = stack
          self.volume = volume
          self.manga = volume.manga
+        self.id = id
          self.name = name
          self.url = url
          self.cpag = None
@@ -70,9 +72,10 @@ class chapter(lib.pagelist):
          return "<mangafox.chapter %r.%r.%r>" % (self.manga.name, self.volume.name, self.name)
  
  class volume(lib.pagelist):
-    def __init__(self, manga, stack, name):
+    def __init__(self, manga, stack, id, name):
          self.stack = stack
          self.manga = manga
+        self.id = id
          self.name = name
          self.ch = []
  
@@ -95,8 +98,9 @@ def nextel(el):
              return el
  
  class manga(lib.manga):
-    def __init__(self, lib, name, url):
+    def __init__(self, lib, id, name, url):
          self.lib = lib
+        self.id = id
          self.name = name
          self.url = url
          self.cvol = None
@@ -114,13 +118,16 @@ class manga(lib.manga):
              vls = page.find("div", id="chapters").findAll("div", attrs={"class": "slide"})
              self.cvol = []
              for i, vn in enumerate(reversed(vls)):
-                vol = volume(self, [(self, i)], vn.find("h3", attrs={"class": "volume"}).contents[0].strip())
+                name = vn.find("h3", attrs={"class": "volume"}).contents[0].strip()
+                vid = name.encode("utf8")
+                vol = volume(self, [(self, i)], vid, name)
                  cls = nextel(vn)
                  if cls.name != u"ul" or cls["class"] != u"chlist":
                      raise Exception("parse error: weird volume list for %r" % self)
                  for o, ch in enumerate(reversed(cls.findAll("li"))):
                      n = ch.div.h3 or ch.div.h4
                      name = n.a.string
+                    chid = name.encode("utf8")
                      for span in ch("span"):
                          try:
                              if u" title " in (u" " + span["class"] + u" "):
@@ -130,7 +137,7 @@ class manga(lib.manga):
                      url = n.a["href"].encode("us-ascii")
                      if url[-7:] != "/1.html":
                          raise Exception("parse error: unexpected chapter URL for %r: %s" % (self, url))
-                    vol.ch.append(chapter(vol, vol.stack + [(vol, o)], name, url[:-6]))
+                    vol.ch.append(chapter(vol, vol.stack + [(vol, o)], chid, name, url[:-6]))
                  self.cvol.append(vol)
          return self.cvol
  
@@ -151,11 +158,14 @@ class library(lib.library):
          page = soup(htcache.fetch(self.base + ("directory/%i.htm?az" % pno)))
          ls = page.find("div", id="mangalist").find("ul", attrs={"class": "list"}).findAll("li")
          ret = []
+        ubase = self.base + "manga/"
          for m in ls:
              t = m.find("div", attrs={"class": "manga_text"}).find("a", attrs={"class": "title"})
              name = t.string
              url = t["href"].encode("us-ascii")
-            ret.append(manga(self, name, url))
+            if url[:len(ubase)] != ubase or url.find('/', len(ubase)) != (len(url) - 1):
+                raise Exception("parse error: unexpected manga URL for %r: %s" % (name, url))
+            ret.append(manga(self, url[len(ubase):-1], name, url))
          return ret
  
      def alphapages(self):
@@ -197,5 +207,14 @@ class library(lib.library):
              ls = self.alphapage(pno)
              i = 0
  
+    def byid(self, id):
+        url = self.base + ("manga/%s/" % id)
+        page = soup(htcache.fetch(url))
+        if page.find("div", id="title") is None:
+            # Assume we got the search page
+            raise KeyError(id)
+        name = page.find("div", id="series_info").find("div", attrs={"class": "cover"}).img["alt"]
+        return manga(self, id, name, url)
+
      def __iter__(self):
          raise NotImplementedError("mangafox iterator")
author	Fredrik Tolf <fredrik@dolda2000.com>
	Wed, 29 Feb 2012 21:26:17 +0000 (22:26 +0100)
committer	Fredrik Tolf <fredrik@dolda2000.com>
	Wed, 29 Feb 2012 21:26:17 +0000 (22:26 +0100)
manga/lib.py		patch \| blob \| blame \| history
manga/mangafox.py		patch \| blob \| blame \| history