| 1 | import BeautifulSoup, urlparse |
| 2 | import lib, htcache |
| 3 | soup = BeautifulSoup.BeautifulSoup |
| 4 | soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES) |
| 5 | |
| 6 | class page(lib.page): |
| 7 | def __init__(self, chapter, stack, n, url): |
| 8 | self.stack = stack |
| 9 | self.chapter = chapter |
| 10 | self.manga = chapter.manga |
| 11 | self.n = n |
| 12 | self.id = str(n) |
| 13 | self.name = u"Page " + unicode(n) |
| 14 | self.url = url |
| 15 | self.ciurl = None |
| 16 | |
| 17 | def iurl(self): |
| 18 | if self.ciurl is None: |
| 19 | page = soupify(htcache.fetch(self.url)) |
| 20 | for tr in page.findAll("tr"): |
| 21 | img = tr.find("img", id="picture") |
| 22 | if img is not None: |
| 23 | self.ciurl = urlparse.urljoin(self.url, img["src"].encode("us-ascii")) |
| 24 | if self.ciurl is None: |
| 25 | raise Exception("parse error: could not find image url for %r" % self) |
| 26 | return self.ciurl |
| 27 | |
| 28 | def open(self): |
| 29 | return lib.stdimgstream(self.iurl()) |
| 30 | |
| 31 | def __str__(self): |
| 32 | return self.name |
| 33 | |
| 34 | def __repr__(self): |
| 35 | return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name) |
| 36 | |
| 37 | class chapter(lib.pagelist): |
| 38 | def __init__(self, manga, stack, id, name, url): |
| 39 | self.stack = stack |
| 40 | self.manga = manga |
| 41 | self.id = id |
| 42 | self.name = name |
| 43 | self.url = url |
| 44 | self.cpag = None |
| 45 | |
| 46 | def __getitem__(self, i): |
| 47 | return self.pages()[i] |
| 48 | |
| 49 | def __len__(self): |
| 50 | return len(self.pages()) |
| 51 | |
| 52 | def pages(self): |
| 53 | if self.cpag is None: |
| 54 | if self.url[-2:] != "/1": |
| 55 | raise Exception("parse error: unexpected first page url for %r" % self) |
| 56 | base = self.url[:-1] |
| 57 | pg = soupify(htcache.fetch(self.url)) |
| 58 | pag = [] |
| 59 | for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"): |
| 60 | n = int(opt["value"]) |
| 61 | url = urlparse.urljoin(base, str(n)) |
| 62 | pag.append(page(self, self.stack + [(self, len(pag))], n, url)) |
| 63 | self.cpag = pag |
| 64 | return self.cpag |
| 65 | |
| 66 | def __str__(self): |
| 67 | return self.name |
| 68 | |
| 69 | def __repr__(self): |
| 70 | return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name) |
| 71 | |
| 72 | class manga(lib.manga): |
| 73 | def __init__(self, lib, id, name, url): |
| 74 | self.lib = lib |
| 75 | self.id = id |
| 76 | self.name = name |
| 77 | self.url = url |
| 78 | self.cch = None |
| 79 | self.stack = [] |
| 80 | |
| 81 | def __getitem__(self, i): |
| 82 | return self.ch()[i] |
| 83 | |
| 84 | def __len__(self): |
| 85 | return len(self.ch()) |
| 86 | |
| 87 | def ch(self): |
| 88 | if self.cch is None: |
| 89 | page = soupify(htcache.fetch(self.url)) |
| 90 | cls = None |
| 91 | for div in page.findAll("div", attrs={"class": "post"}): |
| 92 | if div.h3 is not None and u"Chapter List" in div.h3.string: |
| 93 | cls = div |
| 94 | break |
| 95 | if cls is None: |
| 96 | raise Exception("parse error: no chapter list found for %r" % self) |
| 97 | cch = [] |
| 98 | for tr in cls.table.findAll("tr"): |
| 99 | lcol = tr.findAll("td")[1] |
| 100 | if lcol.a is None: continue |
| 101 | link = lcol.a |
| 102 | url = link["href"].encode("us-ascii") |
| 103 | name = link["title"] |
| 104 | cid = name.encode("utf-8") |
| 105 | cch.append(chapter(self, [(self, len(cch))], cid, name, url)) |
| 106 | self.cch = cch |
| 107 | return self.cch |
| 108 | |
| 109 | def __str__(self): |
| 110 | return self.name |
| 111 | |
| 112 | def __repr__(self): |
| 113 | return "<rawsen.manga %r>" % self.name |
| 114 | |
| 115 | class library(lib.library): |
| 116 | def __init__(self): |
| 117 | self.base = "http://raw.senmanga.com/" |
| 118 | |
| 119 | def byid(self, id): |
| 120 | url = urlparse.urljoin(self.base, id + "/") |
| 121 | page = soupify(htcache.fetch(url)) |
| 122 | name = None |
| 123 | for div in page.findAll("div", attrs={"class": "post"}): |
| 124 | if div.h2 is not None and div.h2.a is not None: |
| 125 | curl = div.h2.a["href"].encode("us-ascii") |
| 126 | if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue |
| 127 | if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue |
| 128 | name = div.h2.a.string |
| 129 | if name is None: |
| 130 | raise KeyError(id) |
| 131 | return manga(self, id, name, url) |
| 132 | |
| 133 | def __iter__(self): |
| 134 | page = soupify(htcache.fetch(self.base + "Manga/")) |
| 135 | for part in page.find("div", attrs={"class": "post"}).findAll("table"): |
| 136 | for row in part.findAll("tr"): |
| 137 | link = row.findAll("td")[1].a |
| 138 | if link is None: |
| 139 | continue |
| 140 | url = link["href"].encode("us-ascii") |
| 141 | name = link.string |
| 142 | if len(url) < 3 or url[:1] != '/' or url[-1:] != '/': |
| 143 | continue |
| 144 | id = url[1:-1] |
| 145 | yield manga(self, id, name, urlparse.urljoin(self.base, url)) |
| 146 | |
| 147 | def byname(self, prefix): |
| 148 | if not isinstance(prefix, unicode): |
| 149 | prefix = prefix.decode("utf8") |
| 150 | prefix = prefix.lower() |
| 151 | for manga in self: |
| 152 | if manga.name.lower()[:len(prefix)] == prefix: |
| 153 | yield manga |
| 154 | |
| 155 | def search(self, expr): |
| 156 | if not isinstance(expr, unicode): |
| 157 | expr = expr.decode("utf8") |
| 158 | expr = expr.lower() |
| 159 | for manga in self: |
| 160 | if expr in manga.name.lower(): |
| 161 | yield manga |