| 1 | import urllib |
| 2 | import BeautifulSoup |
| 3 | import lib, htcache |
| 4 | soup = BeautifulSoup.BeautifulSoup |
| 5 | |
| 6 | class imgstream(lib.imgstream): |
| 7 | def __init__(self, url): |
| 8 | self.bk = urllib.urlopen(url) |
| 9 | ok = False |
| 10 | try: |
| 11 | if self.bk.getcode() != 200: |
| 12 | raise IOError("Server error: " + str(self.bk.getcode())) |
| 13 | self.ctype = self.bk.info()["Content-Type"] |
| 14 | self.clen = int(self.bk.info()["Content-Length"]) |
| 15 | ok = True |
| 16 | finally: |
| 17 | if not ok: |
| 18 | self.bk.close() |
| 19 | |
| 20 | def fileno(self): |
| 21 | return self.bk.fileno() |
| 22 | |
| 23 | def close(self): |
| 24 | self.bk.close() |
| 25 | |
| 26 | def read(self, sz = None): |
| 27 | if sz is None: |
| 28 | return self.bk.read() |
| 29 | else: |
| 30 | return self.bk.read(sz) |
| 31 | |
| 32 | class page(lib.page): |
| 33 | def __init__(self, chapter, stack, n, url): |
| 34 | self.stack = stack |
| 35 | self.chapter = chapter |
| 36 | self.volume = self.chapter.volume |
| 37 | self.manga = self.volume.manga |
| 38 | self.n = n |
| 39 | self.id = str(n) |
| 40 | self.name = u"Page %s" % n |
| 41 | self.url = url |
| 42 | self.ciurl = None |
| 43 | |
| 44 | def iurl(self): |
| 45 | if self.ciurl is None: |
| 46 | page = soup(htcache.fetch(self.url)) |
| 47 | self.ciurl = page.find("div", id="viewer").find("img", id="image")["src"] |
| 48 | return self.ciurl |
| 49 | |
| 50 | def open(self): |
| 51 | return imgstream(self.iurl()) |
| 52 | |
| 53 | def __str__(self): |
| 54 | return self.name |
| 55 | |
| 56 | def __repr__(self): |
| 57 | return "<mangafox.page %r.%r.%r.%r>" % (self.manga.name, self.volume.name, self.chapter.name, self.name) |
| 58 | |
| 59 | class chapter(lib.pagelist): |
| 60 | def __init__(self, volume, stack, id, name, url): |
| 61 | self.stack = stack |
| 62 | self.volume = volume |
| 63 | self.manga = volume.manga |
| 64 | self.id = id |
| 65 | self.name = name |
| 66 | self.url = url |
| 67 | self.cpag = None |
| 68 | |
| 69 | def __getitem__(self, i): |
| 70 | return self.pages()[i] |
| 71 | |
| 72 | def __len__(self): |
| 73 | return len(self.pages()) |
| 74 | |
| 75 | def pages(self): |
| 76 | if self.cpag is None: |
| 77 | pg = soup(htcache.fetch(self.url + "1.html")) |
| 78 | l = pg.find("form", id="top_bar").find("div", attrs={"class": "l"}) |
| 79 | if len(l.contents) != 3: |
| 80 | raise Exception("parse error: weird page list for %r" % self) |
| 81 | m = l.contents[2].strip() |
| 82 | if m[:3] != u"of ": |
| 83 | raise Exception("parse error: weird page list for %r" % self) |
| 84 | self.cpag = [page(self, self.stack + [(self, n)], n + 1, self.url + ("%i.html" % (n + 1))) for n in xrange(int(m[3:]))] |
| 85 | return self.cpag |
| 86 | |
| 87 | def __str__(self): |
| 88 | return self.name |
| 89 | |
| 90 | def __repr__(self): |
| 91 | return "<mangafox.chapter %r.%r.%r>" % (self.manga.name, self.volume.name, self.name) |
| 92 | |
| 93 | class volume(lib.pagelist): |
| 94 | def __init__(self, manga, stack, id, name): |
| 95 | self.stack = stack |
| 96 | self.manga = manga |
| 97 | self.id = id |
| 98 | self.name = name |
| 99 | self.ch = [] |
| 100 | |
| 101 | def __getitem__(self, i): |
| 102 | return self.ch[i] |
| 103 | |
| 104 | def __len__(self): |
| 105 | return len(self.ch) |
| 106 | |
| 107 | def __str__(self): |
| 108 | return self.name |
| 109 | |
| 110 | def __repr__(self): |
| 111 | return "<mangafox.volume %r.%r>" % (self.manga.name, self.name) |
| 112 | |
| 113 | def nextel(el): |
| 114 | while True: |
| 115 | el = el.nextSibling |
| 116 | if isinstance(el, BeautifulSoup.Tag): |
| 117 | return el |
| 118 | |
| 119 | class manga(lib.manga): |
| 120 | def __init__(self, lib, id, name, url): |
| 121 | self.lib = lib |
| 122 | self.id = id |
| 123 | self.name = name |
| 124 | self.url = url |
| 125 | self.cvol = None |
| 126 | self.stack = [] |
| 127 | |
| 128 | def __getitem__(self, i): |
| 129 | return self.vols()[i] |
| 130 | |
| 131 | def __len__(self): |
| 132 | return len(self.vols()) |
| 133 | |
| 134 | def vols(self): |
| 135 | if self.cvol is None: |
| 136 | page = soup(htcache.fetch(self.url)) |
| 137 | vls = page.find("div", id="chapters").findAll("div", attrs={"class": "slide"}) |
| 138 | cvol = [] |
| 139 | for i, vn in enumerate(reversed(vls)): |
| 140 | name = vn.find("h3", attrs={"class": "volume"}).contents[0].strip() |
| 141 | vid = name.encode("utf8") |
| 142 | vol = volume(self, [(self, i)], vid, name) |
| 143 | cls = nextel(vn) |
| 144 | if cls.name != u"ul" or cls["class"] != u"chlist": |
| 145 | raise Exception("parse error: weird volume list for %r" % self) |
| 146 | for o, ch in enumerate(reversed(cls.findAll("li"))): |
| 147 | n = ch.div.h3 or ch.div.h4 |
| 148 | name = n.a.string |
| 149 | chid = name.encode("utf8") |
| 150 | for span in ch("span"): |
| 151 | try: |
| 152 | if u" title " in (u" " + span["class"] + u" "): |
| 153 | name += " " + span.string |
| 154 | except KeyError: |
| 155 | pass |
| 156 | url = n.a["href"].encode("us-ascii") |
| 157 | if url[-7:] != "/1.html": |
| 158 | raise Exception("parse error: unexpected chapter URL for %r: %s" % (self, url)) |
| 159 | vol.ch.append(chapter(vol, vol.stack + [(vol, o)], chid, name, url[:-6])) |
| 160 | cvol.append(vol) |
| 161 | self.cvol = cvol |
| 162 | return self.cvol |
| 163 | |
| 164 | def __str__(self): |
| 165 | return self.name |
| 166 | |
| 167 | def __repr__(self): |
| 168 | return "<mangafox.manga %r>" % self.name |
| 169 | |
| 170 | def libalphacmp(a, b): |
| 171 | return cmp(a.upper(), b.upper()) |
| 172 | |
| 173 | class library(lib.library): |
| 174 | def __init__(self): |
| 175 | self.base = "http://mangafox.me/" |
| 176 | |
| 177 | def alphapage(self, pno): |
| 178 | page = soup(htcache.fetch(self.base + ("directory/%i.htm?az" % pno))) |
| 179 | ls = page.find("div", id="mangalist").find("ul", attrs={"class": "list"}).findAll("li") |
| 180 | ret = [] |
| 181 | ubase = self.base + "manga/" |
| 182 | for m in ls: |
| 183 | t = m.find("div", attrs={"class": "manga_text"}).find("a", attrs={"class": "title"}) |
| 184 | name = t.string |
| 185 | url = t["href"].encode("us-ascii") |
| 186 | if url[:len(ubase)] != ubase or url.find('/', len(ubase)) != (len(url) - 1): |
| 187 | raise Exception("parse error: unexpected manga URL for %r: %s" % (name, url)) |
| 188 | ret.append(manga(self, url[len(ubase):-1], name, url)) |
| 189 | return ret |
| 190 | |
| 191 | def alphapages(self): |
| 192 | page = soup(htcache.fetch(self.base + "directory/?az")) |
| 193 | ls = page.find("div", id="mangalist").find("div", id="nav").find("ul").findAll("li") |
| 194 | return int(ls[-2].find("a").string) |
| 195 | |
| 196 | def byname(self, prefix): |
| 197 | if not isinstance(prefix, unicode): |
| 198 | prefix = prefix.decode("utf8") |
| 199 | l = 1 |
| 200 | r = self.alphapages() |
| 201 | while True: |
| 202 | if l > r: |
| 203 | return |
| 204 | c = l + ((r + 1 - l) // 2) |
| 205 | ls = self.alphapage(c) |
| 206 | if libalphacmp(ls[0].name, prefix) > 0: |
| 207 | r = c - 1 |
| 208 | elif libalphacmp(ls[-1].name, prefix) < 0: |
| 209 | l = c + 1 |
| 210 | else: |
| 211 | pno = c |
| 212 | break |
| 213 | i = 0 |
| 214 | while i < len(ls): |
| 215 | m = ls[i] |
| 216 | if libalphacmp(m.name, prefix) >= 0: |
| 217 | break |
| 218 | i += 1 |
| 219 | while True: |
| 220 | while i < len(ls): |
| 221 | m = ls[i] |
| 222 | if not m.name[:len(prefix)].upper() == prefix.upper(): |
| 223 | return |
| 224 | yield m |
| 225 | i += 1 |
| 226 | pno += 1 |
| 227 | ls = self.alphapage(pno) |
| 228 | i = 0 |
| 229 | |
| 230 | def byid(self, id): |
| 231 | url = self.base + ("manga/%s/" % id) |
| 232 | page = soup(htcache.fetch(url)) |
| 233 | if page.find("div", id="title") is None: |
| 234 | # Assume we got the search page |
| 235 | raise KeyError(id) |
| 236 | name = page.find("div", id="series_info").find("div", attrs={"class": "cover"}).img["alt"] |
| 237 | return manga(self, id, name, url) |
| 238 | |
| 239 | def __iter__(self): |
| 240 | raise NotImplementedError("mangafox iterator") |