manga/batoto.py

   1 import urllib, re, BeautifulSoup
   2 import lib, htcache
   3 soup = BeautifulSoup.BeautifulSoup
   4
   5 def byclass(el, name, cl):
   6     for ch in el.findAll(name):
   7         if not isinstance(ch, BeautifulSoup.Tag): continue
   8         cll = ch.get("class", "")
   9         if cl in cll.split():
  10             return ch
  11     return None
  12
  13 def nextel(el):
  14     while True:
  15         el = el.nextSibling
  16         if isinstance(el, BeautifulSoup.Tag):
  17             return el
  18
  19 class page(lib.page):
  20     def __init__(self, chapter, stack, n, url):
  21         self.stack = stack
  22         self.chapter = chapter
  23         self.n = n
  24         self.id = str(n)
  25         self.name = u"Page %s" % n
  26         self.url = url
  27         self.ciurl = None
  28
  29     def iurl(self):
  30         if self.ciurl is None:
  31             page = soup(htcache.fetch(self.url))
  32             img = nextel(page.find("div", id="full_image")).img
  33             self.ciurl = img["src"].encode("us-ascii")
  34         return self.ciurl
  35
  36     def open(self):
  37         return lib.stdimgstream(self.iurl())
  38
  39     def __str__(self):
  40         return self.name
  41
  42     def __repr(self):
  43         return "<batoto.page %r.%r.%r>" % (self.chapter.manga.name, self.chapter.name, self.name)
  44
  45 class chapter(lib.pagelist):
  46     def __init__(self, manga, stack, id, name, url):
  47         self.stack = stack
  48         self.manga = manga
  49         self.id = id
  50         self.name = name
  51         self.url = url
  52         self.cpag = None
  53
  54     def __getitem__(self, i):
  55         return self.pages()[i]
  56
  57     def __len__(self):
  58         return len(self.pages())
  59
  60     pnre = re.compile(r"page (\d+)")
  61     def pages(self):
  62         if self.cpag is None:
  63             pg = soup(htcache.fetch(self.url))
  64             cpag = []
  65             for opt in pg.find("select", id="page_select").findAll("option"):
  66                 url = opt["value"].encode("us-ascii")
  67                 n = int(self.pnre.match(opt.string).group(1))
  68                 cpag.append(page(self, self.stack + [(self, len(cpag))], n, url))
  69             self.cpag = cpag
  70         return self.cpag
  71
  72     def __str__(self):
  73         return self.name
  74
  75     def __repr__(self):
  76         return "<batoto.chapter %r.%r>" % (self.manga.name, self.name)
  77
  78 class manga(lib.manga):
  79     def __init__(self, lib, id, name, url):
  80         self.lib = lib
  81         self.id = id
  82         self.name = name
  83         self.url = url
  84         self.cch = None
  85         self.stack = []
  86
  87     def __getitem__(self, i):
  88         return self.ch()[i]
  89
  90     def __len__(self):
  91         return len(self.ch())
  92
  93     cure = re.compile(r"/read/_/(\d+)/[^/]*")
  94     def ch(self):
  95         if self.cch is None:
  96             page = soup(htcache.fetch(self.url))
  97             cls = byclass(page, u"table", u"chapters_list")
  98             if cls.tbody is not None:
  99                 cls = cls.tbody
 100             scl = u"lang_" + self.lib.lang
 101             cch = []
 102             for ch in cls.childGenerator():
 103                 if isinstance(ch, BeautifulSoup.Tag) and ch.name == u"tr":
 104                     cll = ch.get("class", "").split()
 105                     if u"row" in cll and scl in cll:
 106                         url = ch.td.a["href"].encode("us-ascii")
 107                         m = self.cure.search(url)
 108                         if m is None: raise Exception("Got weird chapter URL: %r" % url)
 109                         cid = m.group(1)
 110                         url = self.lib.base + "read/_/" + cid
 111                         name = ch.td.a.text
 112                         cch.append((cid, name, url))
 113             cch.reverse()
 114             rch = []
 115             for n, (cid, name, url) in enumerate(cch):
 116                 rch.append(chapter(self, [(self, n)], cid, name, url))
 117             self.cch = rch
 118         return self.cch
 119
 120     def __str__(self):
 121         return self.name
 122
 123     def __repr__(self):
 124         return "<batoto.manga %r>" % self.name
 125
 126 class library(lib.library):
 127     def __init__(self):
 128         self.base = "http://www.batoto.net/"
 129         self.lang = u"English"
 130
 131     def byid(self, id):
 132         url = self.base + "comic/_/comics/" + id
 133         page = soup(htcache.fetch(url))
 134         title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
 135         if title is None:
 136             raise KeyError(id)
 137         return manga(self, id, title.string.strip(), url)
 138
 139     mure = re.compile(r"/comic/_/comics/([^/]*)$")
 140     def search(self, expr):
 141         resp = urllib.urlopen(self.base + "forums/index.php?app=core&module=search&do=search&fromMainBar=1",
 142                               urllib.urlencode({"search_term": expr, "search_app": "ccs:database:3"}))
 143         try:
 144             page = soup(resp.read())
 145         finally:
 146             resp.close()
 147         none = page.find("p", attrs={"class": "no_messages"})
 148         if none is not None and u"No results" in none.text:
 149             return []
 150         ret = []
 151         for child in page.find("div", id="search_results").ol.childGenerator():
 152             if isinstance(child, BeautifulSoup.Tag) and child.name == u"li":
 153                 info = child.find("div", attrs={"class": "result_info"})
 154                 url = info.h3.a["href"].encode("us-ascii")
 155                 m = self.mure.search(url)
 156                 if m is None: raise Exception("Got weird manga URL: %r" % url)
 157                 id = m.group(1)
 158                 name = info.h3.a.string.strip()
 159                 ret.append(manga(self, id, name, url))
 160         return ret