Commit | Line | Data |
---|---|---|
50f7a215 FT |
1 | import BeautifulSoup, urlparse |
2 | import lib, htcache | |
3 | soup = BeautifulSoup.BeautifulSoup | |
4 | ||
5 | class page(lib.page): | |
6 | def __init__(self, chapter, stack, n, url): | |
7 | self.stack = stack | |
8 | self.chapter = chapter | |
9 | self.manga = chapter.manga | |
10 | self.n = n | |
11 | self.id = str(n) | |
12 | self.name = u"Page " + unicode(n) | |
13 | self.url = url | |
14 | self.ciurl = None | |
15 | ||
16 | def iurl(self): | |
17 | if self.ciurl is None: | |
18 | page = soup(htcache.fetch(self.url)) | |
19 | for tr in page.findAll("tr"): | |
20 | img = tr.find("img", id="picture") | |
21 | if img is not None: | |
22 | self.ciurl = urlparse.urljoin(self.url, img["src"].encode("us-ascii")) | |
23 | if self.ciurl is None: | |
24 | raise Exception("parse error: could not find image url for %r" % self) | |
25 | return self.ciurl | |
26 | ||
27 | def open(self): | |
28 | return lib.stdimgstream(self.iurl()) | |
29 | ||
30 | def __str__(self): | |
31 | return self.name | |
32 | ||
33 | def __repr__(self): | |
34 | return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name) | |
35 | ||
36 | class chapter(lib.pagelist): | |
37 | def __init__(self, manga, stack, id, name, url): | |
38 | self.stack = stack | |
39 | self.manga = manga | |
40 | self.id = id | |
41 | self.name = name | |
42 | self.url = url | |
43 | self.cpag = None | |
44 | ||
45 | def __getitem__(self, i): | |
46 | return self.pages()[i] | |
47 | ||
48 | def __len__(self): | |
49 | return len(self.pages()) | |
50 | ||
51 | def pages(self): | |
52 | if self.cpag is None: | |
53 | if self.url[-2:] != "/1": | |
54 | raise Exception("parse error: unexpected first page url for %r" % self) | |
55 | base = self.url[:-1] | |
56 | pg = soup(htcache.fetch(self.url)) | |
57 | pag = [] | |
58 | for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"): | |
59 | n = int(opt["value"]) | |
60 | url = urlparse.urljoin(base, str(n)) | |
61 | pag.append(page(self, self.stack + [(self, len(pag))], n, url)) | |
62 | self.cpag = pag | |
63 | return self.cpag | |
64 | ||
65 | def __str__(self): | |
66 | return self.name | |
67 | ||
68 | def __repr__(self): | |
69 | return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name) | |
70 | ||
71 | class manga(lib.manga): | |
72 | def __init__(self, lib, id, name, url): | |
73 | self.lib = lib | |
74 | self.id = id | |
75 | self.name = name | |
76 | self.url = url | |
77 | self.cch = None | |
78 | self.stack = [] | |
79 | ||
80 | def __getitem__(self, i): | |
81 | return self.ch()[i] | |
82 | ||
83 | def __len__(self): | |
84 | return len(self.ch()) | |
85 | ||
86 | def ch(self): | |
87 | if self.cch is None: | |
88 | page = soup(htcache.fetch(self.url)) | |
89 | cls = None | |
90 | for div in page.findAll("div", attrs={"class": "post"}): | |
91 | if div.h3 is not None and u"Chapter List" in div.h3.string: | |
92 | cls = div | |
93 | break | |
94 | if cls is None: | |
95 | raise Exception("parse error: no chapter list found for %r" % self) | |
96 | cch = [] | |
97 | for tr in cls.table.findAll("tr"): | |
98 | lcol = tr.findAll("td")[1] | |
99 | if lcol.a is None: continue | |
100 | link = lcol.a | |
101 | url = link["href"].encode("us-ascii") | |
102 | name = link["title"] | |
103 | cid = name.encode("utf-8") | |
104 | cch.append(chapter(self, [(self, len(cch))], cid, name, url)) | |
105 | self.cch = cch | |
106 | return self.cch | |
107 | ||
108 | def __str__(self): | |
109 | return self.name | |
110 | ||
111 | def __repr__(self): | |
112 | return "<rawsen.manga %r>" % self.name | |
113 | ||
114 | class library(lib.library): | |
115 | def __init__(self): | |
116 | self.base = "http://raw.senmanga.com/" | |
117 | ||
118 | def byid(self, id): | |
119 | url = urlparse.urljoin(self.base, id + "/") | |
120 | page = soup(htcache.fetch(url)) | |
121 | name = None | |
122 | for div in page.findAll("div", attrs={"class": "post"}): | |
123 | if div.h2 is not None and div.h2.a is not None: | |
124 | curl = div.h2.a["href"].encode("us-ascii") | |
125 | if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue | |
126 | if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue | |
127 | name = div.h2.a.string | |
128 | if name is None: | |
129 | raise KeyError(id) | |
130 | return manga(self, id, name, url) | |
131 | ||
132 | def __iter__(self): | |
133 | page = soup(htcache.fetch(self.base + "Manga/")) | |
134 | for part in page.find("div", attrs={"class": "post"}).findAll("table"): | |
135 | for row in part.findAll("tr"): | |
136 | link = row.findAll("td")[1].a | |
137 | if link is None: | |
138 | continue | |
139 | url = link["href"].encode("us-ascii") | |
140 | name = link.string | |
141 | if len(url) < 3 or url[:1] != '/' or url[-1:] != '/': | |
142 | continue | |
143 | id = url[1:-1] | |
144 | yield manga(self, id, name, urlparse.urljoin(self.base, url)) | |
145 | ||
146 | def byname(self, prefix): | |
147 | if not isinstance(prefix, unicode): | |
148 | prefix = prefix.decode("utf8") | |
149 | prefix = prefix.lower() | |
150 | for manga in self: | |
151 | if manga.name.lower()[:len(prefix)] == prefix: | |
152 | yield manga | |
153 | ||
154 | def search(self, expr): | |
155 | if not isinstance(expr, unicode): | |
156 | expr = expr.decode("utf8") | |
157 | expr = expr.lower() | |
158 | for manga in self: | |
159 | if expr in manga.name.lower(): | |
160 | yield manga |