Commit | Line | Data |
---|---|---|
08e259d7 FT |
1 | import urllib, re, BeautifulSoup |
2 | import lib, htcache | |
3 | soup = BeautifulSoup.BeautifulSoup | |
4 | ||
5 | def byclass(el, name, cl): | |
6 | for ch in el.findAll(name): | |
7 | if not isinstance(ch, BeautifulSoup.Tag): continue | |
8 | cll = ch.get("class", "") | |
9 | if cl in cll.split(): | |
10 | return ch | |
11 | return None | |
12 | ||
13 | def nextel(el): | |
14 | while True: | |
15 | el = el.nextSibling | |
16 | if isinstance(el, BeautifulSoup.Tag): | |
17 | return el | |
18 | ||
19 | class page(lib.page): | |
20 | def __init__(self, chapter, stack, n, url): | |
21 | self.stack = stack | |
22 | self.chapter = chapter | |
23 | self.n = n | |
24 | self.id = str(n) | |
25 | self.name = u"Page %s" % n | |
26 | self.url = url | |
27 | self.ciurl = None | |
28 | ||
29 | def iurl(self): | |
30 | if self.ciurl is None: | |
31 | page = soup(htcache.fetch(self.url)) | |
32 | img = nextel(page.find("div", id="full_image")).img | |
33 | self.ciurl = img["src"].encode("us-ascii") | |
34 | return self.ciurl | |
35 | ||
36 | def open(self): | |
37 | return lib.stdimgstream(self.iurl()) | |
38 | ||
39 | def __str__(self): | |
40 | return self.name | |
41 | ||
42 | def __repr(self): | |
43 | return "<batoto.page %r.%r.%r>" % (self.chapter.manga.name, self.chapter.name, self.name) | |
44 | ||
45 | class chapter(lib.pagelist): | |
46 | def __init__(self, manga, stack, id, name, url): | |
47 | self.stack = stack | |
48 | self.manga = manga | |
49 | self.id = id | |
50 | self.name = name | |
51 | self.url = url | |
52 | self.cpag = None | |
53 | ||
54 | def __getitem__(self, i): | |
55 | return self.pages()[i] | |
56 | ||
57 | def __len__(self): | |
58 | return len(self.pages()) | |
59 | ||
60 | pnre = re.compile(r"page (\d+)") | |
61 | def pages(self): | |
62 | if self.cpag is None: | |
63 | pg = soup(htcache.fetch(self.url)) | |
64 | cpag = [] | |
65 | for opt in pg.find("select", id="page_select").findAll("option"): | |
66 | url = opt["value"].encode("us-ascii") | |
67 | n = int(self.pnre.match(opt.string).group(1)) | |
68 | cpag.append(page(self, self.stack + [(self, len(cpag))], n, url)) | |
69 | self.cpag = cpag | |
70 | return self.cpag | |
71 | ||
72 | def __str__(self): | |
73 | return self.name | |
74 | ||
75 | def __repr__(self): | |
76 | return "<batoto.chapter %r.%r>" % (self.manga.name, self.name) | |
77 | ||
78 | class manga(lib.manga): | |
79 | def __init__(self, lib, id, name, url): | |
80 | self.lib = lib | |
81 | self.id = id | |
82 | self.name = name | |
83 | self.url = url | |
84 | self.cch = None | |
85 | self.stack = [] | |
1043cbdb | 86 | self.cnames = None |
08e259d7 FT |
87 | |
88 | def __getitem__(self, i): | |
89 | return self.ch()[i] | |
90 | ||
91 | def __len__(self): | |
92 | return len(self.ch()) | |
93 | ||
94 | cure = re.compile(r"/read/_/(\d+)/[^/]*") | |
95 | def ch(self): | |
96 | if self.cch is None: | |
97 | page = soup(htcache.fetch(self.url)) | |
98 | cls = byclass(page, u"table", u"chapters_list") | |
99 | if cls.tbody is not None: | |
100 | cls = cls.tbody | |
101 | scl = u"lang_" + self.lib.lang | |
102 | cch = [] | |
103 | for ch in cls.childGenerator(): | |
104 | if isinstance(ch, BeautifulSoup.Tag) and ch.name == u"tr": | |
105 | cll = ch.get("class", "").split() | |
106 | if u"row" in cll and scl in cll: | |
107 | url = ch.td.a["href"].encode("us-ascii") | |
108 | m = self.cure.search(url) | |
109 | if m is None: raise Exception("Got weird chapter URL: %r" % url) | |
110 | cid = m.group(1) | |
111 | url = self.lib.base + "read/_/" + cid | |
112 | name = ch.td.a.text | |
687f2ed3 | 113 | cch.append((cid, name, url)) |
08e259d7 | 114 | cch.reverse() |
687f2ed3 FT |
115 | rch = [] |
116 | for n, (cid, name, url) in enumerate(cch): | |
117 | rch.append(chapter(self, [(self, n)], cid, name, url)) | |
118 | self.cch = rch | |
08e259d7 FT |
119 | return self.cch |
120 | ||
1043cbdb FT |
121 | def altnames(self): |
122 | if self.cnames is None: | |
123 | page = soup(htcache.fetch(self.url)) | |
124 | cnames = None | |
125 | for tbl in page.findAll("table", attrs={"class": "ipb_table"}): | |
126 | if tbl.tbody is not None: tbl = tbl.tbody | |
127 | for tr in tbl.findAll("tr"): | |
128 | if u"Alt Names:" in tr.td.text: | |
129 | nls = nextel(tr.td) | |
130 | if nls.name != u"td" or nls.span is None: | |
131 | raise Exception("Weird altnames table in " + self.id) | |
132 | cnames = [nm.text.strip() for nm in nls.findAll("span")] | |
133 | break | |
134 | if cnames is not None: | |
135 | break | |
136 | if cnames is None: | |
137 | raise Exception("Could not find altnames for " + self.id) | |
138 | self.cnames = cnames | |
139 | return self.cnames | |
140 | ||
08e259d7 FT |
141 | def __str__(self): |
142 | return self.name | |
143 | ||
144 | def __repr__(self): | |
145 | return "<batoto.manga %r>" % self.name | |
146 | ||
147 | class library(lib.library): | |
148 | def __init__(self): | |
149 | self.base = "http://www.batoto.net/" | |
150 | self.lang = u"English" | |
151 | ||
152 | def byid(self, id): | |
153 | url = self.base + "comic/_/comics/" + id | |
154 | page = soup(htcache.fetch(url)) | |
155 | title = page.find("h1", attrs={"class": "ipsType_pagetitle"}) | |
156 | if title is None: | |
157 | raise KeyError(id) | |
158 | return manga(self, id, title.string.strip(), url) | |
159 | ||
24f0a3b7 | 160 | def _search(self, pars): |
1043cbdb FT |
161 | p = 1 |
162 | while True: | |
24f0a3b7 FT |
163 | _pars = dict(pars) |
164 | _pars["p"] = str(p) | |
165 | resp = urllib.urlopen(self.base + "search?" + urllib.urlencode(_pars)) | |
1043cbdb FT |
166 | try: |
167 | page = soup(resp.read()) | |
168 | finally: | |
169 | resp.close() | |
170 | rls = page.find("div", id="comic_search_results").table | |
171 | if rls.tbody is not None: | |
172 | rls = rls.tbody | |
173 | hasmore = False | |
174 | for child in rls.findAll("tr"): | |
175 | if child.th is not None: continue | |
24f0a3b7 | 176 | if child.get("id", u"")[:11] == u"comic_rowo_": continue |
1043cbdb FT |
177 | if child.get("id") == u"show_more_row": |
178 | hasmore = True | |
179 | continue | |
180 | link = child.td.strong.a | |
181 | url = link["href"].encode("us-ascii") | |
182 | m = self.rure.search(url) | |
183 | if m is None: raise Exception("Got weird manga URL: %r" % url) | |
184 | id = m.group(1) | |
185 | name = link.text.strip() | |
1043cbdb FT |
186 | yield manga(self, id, name, url) |
187 | p += 1 | |
188 | if not hasmore: | |
189 | break | |
24f0a3b7 FT |
190 | |
191 | rure = re.compile(r"/comic/_/([^/]*)$") | |
192 | def search(self, expr): | |
193 | if not isinstance(expr, unicode): | |
194 | expr = expr.decode("utf8") | |
195 | return self._search({"name": expr.encode("utf8"), "name_cond": "c"}) | |
196 | ||
197 | def byname(self, prefix): | |
198 | if not isinstance(prefix, unicode): | |
199 | prefix = prefix.decode("utf8") | |
200 | for res in self._search({"name": prefix.encode("utf8"), "name_cond": "s"}): | |
201 | if res.name[:len(prefix)].lower() == prefix.lower(): | |
202 | yield res | |
203 | else: | |
204 | for aname in res.altnames(): | |
205 | if aname[:len(prefix)].lower() == prefix.lower(): | |
206 | yield manga(self, res.id, aname, res.url) | |
207 | break | |
208 | else: | |
209 | if False: | |
210 | print "eliding " + res.name | |
211 | print res.altnames() |