1 import BeautifulSoup, urlparse
3 soup = BeautifulSoup.BeautifulSoup
6 def __init__(self, chapter, stack, n, url):
9 self.manga = chapter.manga
12 self.name = u"Page " + unicode(n)
17 if self.ciurl is None:
18 page = soup(htcache.fetch(self.url))
19 for tr in page.findAll("tr"):
20 img = tr.find("img", id="picture")
22 self.ciurl = urlparse.urljoin(self.url, img["src"].encode("us-ascii"))
23 if self.ciurl is None:
24 raise Exception("parse error: could not find image url for %r" % self)
28 return lib.stdimgstream(self.iurl())
34 return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)
36 class chapter(lib.pagelist):
37 def __init__(self, manga, stack, id, name, url):
45 def __getitem__(self, i):
46 return self.pages()[i]
49 return len(self.pages())
53 if self.url[-2:] != "/1":
54 raise Exception("parse error: unexpected first page url for %r" % self)
56 pg = soup(htcache.fetch(self.url))
58 for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"):
60 url = urlparse.urljoin(base, str(n))
61 pag.append(page(self, self.stack + [(self, len(pag))], n, url))
69 return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name)
71 class manga(lib.manga):
72 def __init__(self, lib, id, name, url):
80 def __getitem__(self, i):
88 page = soup(htcache.fetch(self.url))
90 for div in page.findAll("div", attrs={"class": "post"}):
91 if div.h3 is not None and u"Chapter List" in div.h3.string:
95 raise Exception("parse error: no chapter list found for %r" % self)
97 for tr in cls.table.findAll("tr"):
98 lcol = tr.findAll("td")[1]
99 if lcol.a is None: continue
101 url = link["href"].encode("us-ascii")
103 cid = name.encode("utf-8")
104 cch.append(chapter(self, [(self, len(cch))], cid, name, url))
112 return "<rawsen.manga %r>" % self.name
114 class library(lib.library):
116 self.base = "http://raw.senmanga.com/"
119 url = urlparse.urljoin(self.base, id + "/")
120 page = soup(htcache.fetch(url))
122 for div in page.findAll("div", attrs={"class": "post"}):
123 if div.h2 is not None and div.h2.a is not None:
124 curl = div.h2.a["href"].encode("us-ascii")
125 if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue
126 if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue
127 name = div.h2.a.string
130 return manga(self, id, name, url)
133 page = soup(htcache.fetch(self.base + "Manga/"))
134 for part in page.find("div", attrs={"class": "post"}).findAll("table"):
135 for row in part.findAll("tr"):
136 link = row.findAll("td")[1].a
139 url = link["href"].encode("us-ascii")
141 if len(url) < 3 or url[:1] != '/' or url[-1:] != '/':
144 yield manga(self, id, name, urlparse.urljoin(self.base, url))
146 def byname(self, prefix):
147 if not isinstance(prefix, unicode):
148 prefix = prefix.decode("utf8")
149 prefix = prefix.lower()
151 if manga.name.lower()[:len(prefix)] == prefix:
154 def search(self, expr):
155 if not isinstance(expr, unicode):
156 expr = expr.decode("utf8")
159 if expr in manga.name.lower():