1 import BeautifulSoup, urlparse
3 soup = BeautifulSoup.BeautifulSoup
4 soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES)
7 def __init__(self, chapter, stack, n, url):
10 self.manga = chapter.manga
13 self.name = u"Page " + unicode(n)
18 if self.ciurl is None:
19 page = soupify(htcache.fetch(self.url))
20 for tr in page.findAll("tr"):
21 img = tr.find("img", id="picture")
23 self.ciurl = urlparse.urljoin(self.url, img["src"].encode("us-ascii"))
24 if self.ciurl is None:
25 raise Exception("parse error: could not find image url for %r" % self)
29 return lib.stdimgstream(self.iurl())
35 return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)
37 class chapter(lib.pagelist):
38 def __init__(self, manga, stack, id, name, url):
46 def __getitem__(self, i):
47 return self.pages()[i]
50 return len(self.pages())
54 if self.url[-2:] != "/1":
55 raise Exception("parse error: unexpected first page url for %r" % self)
57 pg = soupify(htcache.fetch(self.url))
59 for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"):
61 url = urlparse.urljoin(base, str(n))
62 pag.append(page(self, self.stack + [(self, len(pag))], n, url))
70 return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name)
72 class manga(lib.manga):
73 def __init__(self, lib, id, name, url):
81 def __getitem__(self, i):
89 page = soupify(htcache.fetch(self.url))
91 for div in page.findAll("div", attrs={"class": "post"}):
92 if div.h3 is not None and u"Chapter List" in div.h3.string:
96 raise Exception("parse error: no chapter list found for %r" % self)
98 for tr in cls.table.findAll("tr"):
99 lcol = tr.findAll("td")[1]
100 if lcol.a is None: continue
102 url = link["href"].encode("us-ascii")
104 cid = name.encode("utf-8")
105 cch.append(chapter(self, [(self, len(cch))], cid, name, url))
113 return "<rawsen.manga %r>" % self.name
115 class library(lib.library):
117 self.base = "http://raw.senmanga.com/"
120 url = urlparse.urljoin(self.base, id + "/")
121 page = soupify(htcache.fetch(url))
123 for div in page.findAll("div", attrs={"class": "post"}):
124 if div.h2 is not None and div.h2.a is not None:
125 curl = div.h2.a["href"].encode("us-ascii")
126 if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue
127 if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue
128 name = div.h2.a.string
131 return manga(self, id, name, url)
134 page = soupify(htcache.fetch(self.base + "Manga/"))
135 for part in page.find("div", attrs={"class": "post"}).findAll("table"):
136 for row in part.findAll("tr"):
137 link = row.findAll("td")[1].a
140 url = link["href"].encode("us-ascii")
142 if len(url) < 3 or url[:1] != '/' or url[-1:] != '/':
145 yield manga(self, id, name, urlparse.urljoin(self.base, url))
147 def byname(self, prefix):
148 if not isinstance(prefix, unicode):
149 prefix = prefix.decode("utf8")
150 prefix = prefix.lower()
152 if manga.name.lower()[:len(prefix)] == prefix:
155 def search(self, expr):
156 if not isinstance(expr, unicode):
157 expr = expr.decode("utf8")
160 if expr in manga.name.lower():