Commit | Line | Data |
---|---|---|
f3ad0817 FT |
1 | import urllib |
2 | import BeautifulSoup | |
3 | import lib, htcache | |
4 | soup = BeautifulSoup.BeautifulSoup | |
5 | ||
6 | class imgstream(object): | |
7 | def __init__(self, url): | |
8 | self.bk = urllib.urlopen(url) | |
9 | self.ctype = self.bk.info()["Content-Type"] | |
10 | ||
11 | def close(self): | |
12 | self.bk.close() | |
13 | ||
14 | def __enter__(self): | |
15 | return self | |
16 | ||
17 | def __exit__(self, *exc_info): | |
18 | self.close() | |
19 | ||
20 | def read(self, sz = None): | |
21 | if sz is None: | |
22 | return self.bk.read() | |
23 | else: | |
24 | return self.bk.read(sz) | |
25 | ||
26 | class page(lib.page): | |
27 | def __init__(self, chapter, n, url): | |
28 | self.chapter = chapter | |
29 | self.volume = self.chapter.volume | |
30 | self.manga = self.volume.manga | |
31 | self.n = n | |
32 | self.url = url | |
33 | self.ciurl = None | |
34 | ||
35 | def iurl(self): | |
36 | if self.ciurl is None: | |
37 | page = soup(htcache.fetch(self.url)) | |
38 | self.ciurl = page.find("div", id="viewer").find("img", id="image")["src"] | |
39 | return self.ciurl | |
40 | ||
41 | def open(self): | |
42 | return imgstream(self.iurl()) | |
43 | ||
44 | class chapter(lib.pagelist): | |
45 | def __init__(self, volume, name, url): | |
46 | self.volume = volume | |
47 | self.manga = volume.manga | |
48 | self.name = name | |
49 | self.url = url | |
50 | self.cpag = None | |
51 | ||
52 | def __getitem__(self, i): | |
53 | return self.pages()[i] | |
54 | ||
55 | def __len__(self): | |
56 | return len(self.pages()) | |
57 | ||
58 | def pages(self): | |
59 | if self.cpag is None: | |
60 | pg = soup(htcache.fetch(self.url + "1.html")) | |
61 | l = pg.find("form", id="top_bar").find("div", attrs={"class": "l"}) | |
62 | if len(l.contents) != 3: | |
63 | raise Exception("parse error: weird page list for %r" % self) | |
64 | m = l.contents[2].strip() | |
65 | if m[:3] != u"of ": | |
66 | raise Exception("parse error: weird page list for %r" % self) | |
67 | self.cpag = [page(self, n + 1, self.url + ("%i.html" % (n + 1))) for n in xrange(int(m[3:]))] | |
68 | return self.cpag | |
69 | ||
70 | def __str__(self): | |
71 | return self.name | |
72 | ||
73 | def __repr__(self): | |
74 | return "<mangafox.chapter %r.%r.%r>" % (self.manga.name, self.volume.name, self.name) | |
75 | ||
76 | class volume(lib.pagelist): | |
77 | def __init__(self, manga, name): | |
78 | self.manga = manga | |
79 | self.name = name | |
80 | self.ch = [] | |
81 | ||
82 | def __getitem__(self, i): | |
83 | return self.ch[i] | |
84 | ||
85 | def __len__(self): | |
86 | return len(self.ch) | |
87 | ||
88 | def __str__(self): | |
89 | return self.name | |
90 | ||
91 | def __repr__(self): | |
92 | return "<mangafox.volume %r.%r>" % (self.manga.name, self.name) | |
93 | ||
94 | def nextel(el): | |
95 | while True: | |
96 | el = el.nextSibling | |
97 | if isinstance(el, BeautifulSoup.Tag): | |
98 | return el | |
99 | ||
100 | class manga(lib.manga): | |
101 | def __init__(self, lib, name, url): | |
102 | self.lib = lib | |
103 | self.name = name | |
104 | self.url = url | |
105 | self.cvol = None | |
106 | ||
107 | def __getitem__(self, i): | |
108 | return self.vols()[i] | |
109 | ||
110 | def __len__(self): | |
111 | return len(self.vols()) | |
112 | ||
113 | def vols(self): | |
114 | if self.cvol is None: | |
115 | page = soup(htcache.fetch(self.url)) | |
116 | vls = page.find("div", id="chapters").findAll("div", attrs={"class": "slide"}) | |
117 | self.cvol = [] | |
118 | for i in xrange(len(vls)): | |
119 | vol = volume(self, vls[i].find("h3", attrs={"class": "volume"}).contents[0].strip()) | |
120 | cls = nextel(vls[i]) | |
121 | if cls.name != u"ul" or cls["class"] != u"chlist": | |
122 | raise Exception("parse error: weird volume list for %r" % self) | |
123 | for ch in cls.findAll("li"): | |
124 | n = ch.div.h3 or ch.div.h4 | |
125 | name = n.a.string | |
126 | for span in ch("span"): | |
127 | try: | |
128 | if u" title " in (u" " + span["class"] + u" "): | |
129 | name += " " + span.string | |
130 | except KeyError: | |
131 | pass | |
132 | url = n.a["href"].encode("us-ascii") | |
133 | if url[-7:] != "/1.html": | |
134 | raise Exception("parse error: unexpected chapter URL for %r: %s" % (self, url)) | |
135 | vol.ch.insert(0, chapter(vol, name, url[:-6])) | |
136 | self.cvol.insert(0, vol) | |
137 | return self.cvol | |
138 | ||
139 | def __str__(self): | |
140 | return self.name | |
141 | ||
142 | def __repr__(self): | |
143 | return "<mangafox.manga %r>" % self.name | |
144 | ||
145 | def libalphacmp(a, b): | |
146 | return cmp(a.upper(), b.upper()) | |
147 | ||
148 | class library(lib.library): | |
149 | def __init__(self): | |
150 | self.base = "http://www.mangafox.com/" | |
151 | ||
152 | def alphapage(self, pno): | |
153 | page = soup(htcache.fetch(self.base + ("directory/%i.htm?az" % pno))) | |
154 | ls = page.find("div", id="mangalist").find("ul", attrs={"class": "list"}).findAll("li") | |
155 | ret = [] | |
156 | for m in ls: | |
157 | t = m.find("div", attrs={"class": "manga_text"}).find("a", attrs={"class": "title"}) | |
158 | name = t.string | |
159 | url = t["href"].encode("us-ascii") | |
160 | ret.append(manga(self, name, url)) | |
161 | return ret | |
162 | ||
163 | def alphapages(self): | |
164 | page = soup(htcache.fetch(self.base + "directory/?az")) | |
165 | ls = page.find("div", id="mangalist").find("div", id="nav").find("ul").findAll("li") | |
166 | return int(ls[-2].find("a").string) | |
167 | ||
168 | def byname(self, prefix): | |
169 | if not isinstance(prefix, unicode): | |
170 | prefix = prefix.decode("utf8") | |
171 | l = 1 | |
172 | r = self.alphapages() | |
173 | while True: | |
174 | if l > r: | |
175 | return | |
176 | c = l + ((r + 1 - l) // 2) | |
177 | ls = self.alphapage(c) | |
178 | if libalphacmp(ls[0].name, prefix) > 0: | |
179 | r = c - 1 | |
180 | elif libalphacmp(ls[-1].name, prefix) < 0: | |
181 | l = c + 1 | |
182 | else: | |
183 | pno = c | |
184 | break | |
185 | i = 0 | |
186 | while i < len(ls): | |
187 | m = ls[i] | |
188 | if libalphacmp(m.name, prefix) >= 0: | |
189 | break | |
190 | i += 1 | |
191 | while True: | |
192 | while i < len(ls): | |
193 | m = ls[i] | |
194 | if not m.name[:len(prefix)].upper() == prefix.upper(): | |
195 | return | |
196 | yield m | |
197 | i += 1 | |
198 | pno += 1 | |
199 | ls = self.alphapage(pno) | |
200 | i = 0 | |
943a9376 FT |
201 | |
202 | def __iter__(self): | |
203 | raise NotImplementedError("mangafox iterator") |