2 from . import lib, htcache
3 from urllib.parse import urljoin
4 soup = bs4.BeautifulSoup
5 soupify = lambda cont: soup(cont)
8 def __init__(self, chapter, stack, n, url):
10 self.chapter = chapter
11 self.manga = chapter.manga
14 self.name = "Page " + unicode(n)
19 if self.ciurl is None:
20 page = soupify(htcache.fetch(self.url))
21 for tr in page.findAll("tr"):
22 img = tr.find("img", id="picture")
24 self.ciurl = urljoin(self.url, img["src"])
25 if self.ciurl is None:
26 raise Exception("parse error: could not find image url for %r" % self)
30 return lib.stdimgstream(self.iurl())
36 return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)
38 class chapter(lib.pagelist):
39 def __init__(self, manga, stack, id, name, url):
47 def __getitem__(self, i):
48 return self.pages()[i]
51 return len(self.pages())
55 if self.url[-2:] != "/1":
56 raise Exception("parse error: unexpected first page url for %r" % self)
58 pg = soupify(htcache.fetch(self.url))
60 for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"):
62 url = urljoin(base, str(n))
63 pag.append(page(self, self.stack + [(self, len(pag))], n, url))
71 return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name)
73 class manga(lib.manga):
74 def __init__(self, lib, id, name, url):
82 def __getitem__(self, i):
90 page = soupify(htcache.fetch(self.url))
92 for div in page.findAll("div", attrs={"class": "post"}):
93 if div.h3 is not None and "Chapter List" in div.h3.string:
97 raise Exception("parse error: no chapter list found for %r" % self)
99 for tr in cls.table.findAll("tr"):
100 lcol = tr.findAll("td")[1]
101 if lcol.a is None: continue
106 cch.append(chapter(self, [(self, len(cch))], cid, name, url))
114 return "<rawsen.manga %r>" % self.name
116 class library(lib.library):
118 self.base = "http://raw.senmanga.com/"
121 url = urljoin(self.base, id + "/")
122 page = soupify(htcache.fetch(url))
124 for div in page.findAll("div", id="post"):
125 if div.h1 is not None and div.h1.a is not None:
126 curl = div.h1.a["href"]
127 if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue
128 if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue
129 name = div.h1.a.string
132 return manga(self, id, name, url)
135 page = soupify(htcache.fetch(self.base + "Manga/"))
136 for part in page.find("div", attrs={"class": "post"}).findAll("table"):
137 for row in part.findAll("tr"):
138 link = row.findAll("td")[1].a
143 if len(url) < 3 or url[:1] != '/' or url[-1:] != '/':
146 yield manga(self, id, name, urljoin(self.base, url))
148 def byname(self, prefix):
149 if not isinstance(prefix, unicode):
150 prefix = prefix.decode("utf8")
151 prefix = prefix.lower()
153 if manga.name.lower()[:len(prefix)] == prefix:
156 def search(self, expr):
157 if not isinstance(expr, unicode):
158 expr = expr.decode("utf8")
161 if expr in manga.name.lower():