Commit | Line | Data |
---|---|---|
08e259d7 FT |
1 | import urllib, re, BeautifulSoup |
2 | import lib, htcache | |
3 | soup = BeautifulSoup.BeautifulSoup | |
c0d3b1a2 | 4 | soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES) |
08e259d7 FT |
5 | |
6 | def byclass(el, name, cl): | |
7 | for ch in el.findAll(name): | |
8 | if not isinstance(ch, BeautifulSoup.Tag): continue | |
9 | cll = ch.get("class", "") | |
10 | if cl in cll.split(): | |
11 | return ch | |
12 | return None | |
13 | ||
14 | def nextel(el): | |
15 | while True: | |
16 | el = el.nextSibling | |
17 | if isinstance(el, BeautifulSoup.Tag): | |
18 | return el | |
19 | ||
20 | class page(lib.page): | |
21 | def __init__(self, chapter, stack, n, url): | |
22 | self.stack = stack | |
23 | self.chapter = chapter | |
24 | self.n = n | |
25 | self.id = str(n) | |
26 | self.name = u"Page %s" % n | |
27 | self.url = url | |
28 | self.ciurl = None | |
29 | ||
30 | def iurl(self): | |
31 | if self.ciurl is None: | |
c0d3b1a2 | 32 | page = soupify(htcache.fetch(self.url)) |
08e259d7 FT |
33 | img = nextel(page.find("div", id="full_image")).img |
34 | self.ciurl = img["src"].encode("us-ascii") | |
35 | return self.ciurl | |
36 | ||
37 | def open(self): | |
38 | return lib.stdimgstream(self.iurl()) | |
39 | ||
40 | def __str__(self): | |
41 | return self.name | |
42 | ||
43 | def __repr(self): | |
44 | return "<batoto.page %r.%r.%r>" % (self.chapter.manga.name, self.chapter.name, self.name) | |
45 | ||
46 | class chapter(lib.pagelist): | |
47 | def __init__(self, manga, stack, id, name, url): | |
48 | self.stack = stack | |
49 | self.manga = manga | |
50 | self.id = id | |
51 | self.name = name | |
52 | self.url = url | |
53 | self.cpag = None | |
54 | ||
55 | def __getitem__(self, i): | |
56 | return self.pages()[i] | |
57 | ||
58 | def __len__(self): | |
59 | return len(self.pages()) | |
60 | ||
61 | pnre = re.compile(r"page (\d+)") | |
62 | def pages(self): | |
63 | if self.cpag is None: | |
c0d3b1a2 | 64 | pg = soupify(htcache.fetch(self.url)) |
08e259d7 FT |
65 | cpag = [] |
66 | for opt in pg.find("select", id="page_select").findAll("option"): | |
67 | url = opt["value"].encode("us-ascii") | |
68 | n = int(self.pnre.match(opt.string).group(1)) | |
69 | cpag.append(page(self, self.stack + [(self, len(cpag))], n, url)) | |
70 | self.cpag = cpag | |
71 | return self.cpag | |
72 | ||
73 | def __str__(self): | |
74 | return self.name | |
75 | ||
76 | def __repr__(self): | |
77 | return "<batoto.chapter %r.%r>" % (self.manga.name, self.name) | |
78 | ||
79 | class manga(lib.manga): | |
80 | def __init__(self, lib, id, name, url): | |
81 | self.lib = lib | |
82 | self.id = id | |
83 | self.name = name | |
84 | self.url = url | |
85 | self.cch = None | |
86 | self.stack = [] | |
1043cbdb | 87 | self.cnames = None |
08e259d7 FT |
88 | |
89 | def __getitem__(self, i): | |
90 | return self.ch()[i] | |
91 | ||
92 | def __len__(self): | |
93 | return len(self.ch()) | |
94 | ||
95 | cure = re.compile(r"/read/_/(\d+)/[^/]*") | |
96 | def ch(self): | |
97 | if self.cch is None: | |
c0d3b1a2 | 98 | page = soupify(htcache.fetch(self.url)) |
08e259d7 FT |
99 | cls = byclass(page, u"table", u"chapters_list") |
100 | if cls.tbody is not None: | |
101 | cls = cls.tbody | |
102 | scl = u"lang_" + self.lib.lang | |
103 | cch = [] | |
104 | for ch in cls.childGenerator(): | |
105 | if isinstance(ch, BeautifulSoup.Tag) and ch.name == u"tr": | |
106 | cll = ch.get("class", "").split() | |
107 | if u"row" in cll and scl in cll: | |
108 | url = ch.td.a["href"].encode("us-ascii") | |
109 | m = self.cure.search(url) | |
110 | if m is None: raise Exception("Got weird chapter URL: %r" % url) | |
111 | cid = m.group(1) | |
112 | url = self.lib.base + "read/_/" + cid | |
113 | name = ch.td.a.text | |
687f2ed3 | 114 | cch.append((cid, name, url)) |
08e259d7 | 115 | cch.reverse() |
687f2ed3 FT |
116 | rch = [] |
117 | for n, (cid, name, url) in enumerate(cch): | |
118 | rch.append(chapter(self, [(self, n)], cid, name, url)) | |
119 | self.cch = rch | |
08e259d7 FT |
120 | return self.cch |
121 | ||
1043cbdb FT |
122 | def altnames(self): |
123 | if self.cnames is None: | |
c0d3b1a2 | 124 | page = soupify(htcache.fetch(self.url)) |
1043cbdb FT |
125 | cnames = None |
126 | for tbl in page.findAll("table", attrs={"class": "ipb_table"}): | |
127 | if tbl.tbody is not None: tbl = tbl.tbody | |
128 | for tr in tbl.findAll("tr"): | |
129 | if u"Alt Names:" in tr.td.text: | |
130 | nls = nextel(tr.td) | |
131 | if nls.name != u"td" or nls.span is None: | |
132 | raise Exception("Weird altnames table in " + self.id) | |
133 | cnames = [nm.text.strip() for nm in nls.findAll("span")] | |
134 | break | |
135 | if cnames is not None: | |
136 | break | |
137 | if cnames is None: | |
138 | raise Exception("Could not find altnames for " + self.id) | |
139 | self.cnames = cnames | |
140 | return self.cnames | |
141 | ||
08e259d7 FT |
142 | def __str__(self): |
143 | return self.name | |
144 | ||
145 | def __repr__(self): | |
146 | return "<batoto.manga %r>" % self.name | |
147 | ||
148 | class library(lib.library): | |
149 | def __init__(self): | |
150 | self.base = "http://www.batoto.net/" | |
151 | self.lang = u"English" | |
152 | ||
153 | def byid(self, id): | |
154 | url = self.base + "comic/_/comics/" + id | |
c0d3b1a2 | 155 | page = soupify(htcache.fetch(url)) |
08e259d7 FT |
156 | title = page.find("h1", attrs={"class": "ipsType_pagetitle"}) |
157 | if title is None: | |
158 | raise KeyError(id) | |
159 | return manga(self, id, title.string.strip(), url) | |
160 | ||
24f0a3b7 | 161 | def _search(self, pars): |
1043cbdb FT |
162 | p = 1 |
163 | while True: | |
24f0a3b7 FT |
164 | _pars = dict(pars) |
165 | _pars["p"] = str(p) | |
166 | resp = urllib.urlopen(self.base + "search?" + urllib.urlencode(_pars)) | |
1043cbdb | 167 | try: |
c0d3b1a2 | 168 | page = soupify(resp.read()) |
1043cbdb FT |
169 | finally: |
170 | resp.close() | |
171 | rls = page.find("div", id="comic_search_results").table | |
172 | if rls.tbody is not None: | |
173 | rls = rls.tbody | |
174 | hasmore = False | |
175 | for child in rls.findAll("tr"): | |
176 | if child.th is not None: continue | |
24f0a3b7 | 177 | if child.get("id", u"")[:11] == u"comic_rowo_": continue |
1043cbdb FT |
178 | if child.get("id") == u"show_more_row": |
179 | hasmore = True | |
180 | continue | |
181 | link = child.td.strong.a | |
182 | url = link["href"].encode("us-ascii") | |
183 | m = self.rure.search(url) | |
184 | if m is None: raise Exception("Got weird manga URL: %r" % url) | |
185 | id = m.group(1) | |
186 | name = link.text.strip() | |
1043cbdb FT |
187 | yield manga(self, id, name, url) |
188 | p += 1 | |
189 | if not hasmore: | |
190 | break | |
24f0a3b7 FT |
191 | |
192 | rure = re.compile(r"/comic/_/([^/]*)$") | |
193 | def search(self, expr): | |
194 | if not isinstance(expr, unicode): | |
195 | expr = expr.decode("utf8") | |
196 | return self._search({"name": expr.encode("utf8"), "name_cond": "c"}) | |
197 | ||
198 | def byname(self, prefix): | |
199 | if not isinstance(prefix, unicode): | |
200 | prefix = prefix.decode("utf8") | |
201 | for res in self._search({"name": prefix.encode("utf8"), "name_cond": "s"}): | |
202 | if res.name[:len(prefix)].lower() == prefix.lower(): | |
203 | yield res | |
204 | else: | |
205 | for aname in res.altnames(): | |
206 | if aname[:len(prefix)].lower() == prefix.lower(): | |
207 | yield manga(self, res.id, aname, res.url) | |
208 | break | |
209 | else: | |
210 | if False: | |
211 | print "eliding " + res.name | |
212 | print res.altnames() |