rawsen.py

   1 import BeautifulSoup, urlparse
   2 import lib, htcache
   3 soup = BeautifulSoup.BeautifulSoup
   4 soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES)
   5
   6 class page(lib.page):
   7     def __init__(self, chapter, stack, n, url):
   8         self.stack = stack
   9         self.chapter = chapter
  10         self.manga = chapter.manga
  11         self.n = n
  12         self.id = str(n)
  13         self.name = u"Page " + unicode(n)
  14         self.url = url
  15         self.ciurl = None
  16
  17     def iurl(self):
  18         if self.ciurl is None:
  19             page = soupify(htcache.fetch(self.url))
  20             for tr in page.findAll("tr"):
  21                 img = tr.find("img", id="picture")
  22                 if img is not None:
  23                     self.ciurl = urlparse.urljoin(self.url, img["src"].encode("us-ascii"))
  24             if self.ciurl is None:
  25                 raise Exception("parse error: could not find image url for %r" % self)
  26         return self.ciurl
  27
  28     def open(self):
  29         return lib.stdimgstream(self.iurl())
  30
  31     def __str__(self):
  32         return self.name
  33
  34     def __repr__(self):
  35         return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)
  36
  37 class chapter(lib.pagelist):
  38     def __init__(self, manga, stack, id, name, url):
  39         self.stack = stack
  40         self.manga = manga
  41         self.id = id
  42         self.name = name
  43         self.url = url
  44         self.cpag = None
  45
  46     def __getitem__(self, i):
  47         return self.pages()[i]
  48
  49     def __len__(self):
  50         return len(self.pages())
  51
  52     def pages(self):
  53         if self.cpag is None:
  54             if self.url[-2:] != "/1":
  55                 raise Exception("parse error: unexpected first page url for %r" % self)
  56             base = self.url[:-1]
  57             pg = soupify(htcache.fetch(self.url))
  58             pag = []
  59             for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"):
  60                 n = int(opt["value"])
  61                 url = urlparse.urljoin(base, str(n))
  62                 pag.append(page(self, self.stack + [(self, len(pag))], n, url))
  63             self.cpag = pag
  64         return self.cpag
  65
  66     def __str__(self):
  67         return self.name
  68
  69     def __repr__(self):
  70         return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name)
  71
  72 class manga(lib.manga):
  73     def __init__(self, lib, id, name, url):
  74         self.lib = lib
  75         self.id = id
  76         self.name = name
  77         self.url = url
  78         self.cch = None
  79         self.stack = []
  80
  81     def __getitem__(self, i):
  82         return self.ch()[i]
  83
  84     def __len__(self):
  85         return len(self.ch())
  86
  87     def ch(self):
  88         if self.cch is None:
  89             page = soupify(htcache.fetch(self.url))
  90             cls = None
  91             for div in page.findAll("div", attrs={"class": "post"}):
  92                 if div.h3 is not None and u"Chapter List" in div.h3.string:
  93                     cls = div
  94                     break
  95             if cls is None:
  96                 raise Exception("parse error: no chapter list found for %r" % self)
  97             cch = []
  98             for tr in cls.table.findAll("tr"):
  99                 lcol = tr.findAll("td")[1]
 100                 if lcol.a is None: continue
 101                 link = lcol.a
 102                 url = link["href"].encode("us-ascii")
 103                 name = link["title"]
 104                 cid = name.encode("utf-8")
 105                 cch.append(chapter(self, [(self, len(cch))], cid, name, url))
 106             self.cch = cch
 107         return self.cch
 108
 109     def __str__(self):
 110         return self.name
 111
 112     def __repr__(self):
 113         return "<rawsen.manga %r>" % self.name
 114
 115 class library(lib.library):
 116     def __init__(self):
 117         self.base = "http://raw.senmanga.com/"
 118
 119     def byid(self, id):
 120         url = urlparse.urljoin(self.base, id + "/")
 121         page = soupify(htcache.fetch(url))
 122         name = None
 123         for div in page.findAll("div", attrs={"class": "post"}):
 124             if div.h2 is not None and div.h2.a is not None:
 125                 curl = div.h2.a["href"].encode("us-ascii")
 126                 if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue
 127                 if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue
 128                 name = div.h2.a.string
 129         if name is None:
 130             raise KeyError(id)
 131         return manga(self, id, name, url)
 132
 133     def __iter__(self):
 134         page = soupify(htcache.fetch(self.base + "Manga/"))
 135         for part in page.find("div", attrs={"class": "post"}).findAll("table"):
 136             for row in part.findAll("tr"):
 137                 link = row.findAll("td")[1].a
 138                 if link is None:
 139                     continue
 140                 url = link["href"].encode("us-ascii")
 141                 name = link.string
 142                 if len(url) < 3 or url[:1] != '/' or url[-1:] != '/':
 143                     continue
 144                 id = url[1:-1]
 145                 yield manga(self, id, name, urlparse.urljoin(self.base, url))
 146
 147     def byname(self, prefix):
 148         if not isinstance(prefix, unicode):
 149             prefix = prefix.decode("utf8")
 150         prefix = prefix.lower()
 151         for manga in self:
 152             if manga.name.lower()[:len(prefix)] == prefix:
 153                 yield manga
 154
 155     def search(self, expr):
 156         if not isinstance(expr, unicode):
 157             expr = expr.decode("utf8")
 158         expr = expr.lower()
 159         for manga in self:
 160             if expr in manga.name.lower():
 161                 yield manga