[automanga.git] / manga / rawsen.py

import BeautifulSoup, urlparse
import lib, htcache
soup = BeautifulSoup.BeautifulSoup

class page(lib.page):
    def __init__(self, chapter, stack, n, url):
        self.stack = stack
        self.chapter = chapter
        self.manga = chapter.manga
        self.n = n
        self.id = str(n)
        self.name = u"Page " + unicode(n)
        self.url = url
        self.ciurl = None

    def iurl(self):
        if self.ciurl is None:
            page = soup(htcache.fetch(self.url))
            for tr in page.findAll("tr"):
                img = tr.find("img", id="picture")
                if img is not None:
                    self.ciurl = urlparse.urljoin(self.url, img["src"].encode("us-ascii"))
            if self.ciurl is None:
                raise Exception("parse error: could not find image url for %r" % self)
        return self.ciurl

    def open(self):
        return lib.stdimgstream(self.iurl())

    def __str__(self):
        return self.name

    def __repr__(self):
        return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)

class chapter(lib.pagelist):
    def __init__(self, manga, stack, id, name, url):
        self.stack = stack
        self.manga = manga
        self.id = id
        self.name = name
        self.url = url
        self.cpag = None

    def __getitem__(self, i):
        return self.pages()[i]

    def __len__(self):
        return len(self.pages())

    def pages(self):
        if self.cpag is None:
            if self.url[-2:] != "/1":
                raise Exception("parse error: unexpected first page url for %r" % self)
            base = self.url[:-1]
            pg = soup(htcache.fetch(self.url))
            pag = []
            for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"):
                n = int(opt["value"])
                url = urlparse.urljoin(base, str(n))
                pag.append(page(self, self.stack + [(self, len(pag))], n, url))
            self.cpag = pag
        return self.cpag

    def __str__(self):
        return self.name

    def __repr__(self):
        return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name)

class manga(lib.manga):
    def __init__(self, lib, id, name, url):
        self.lib = lib
        self.id = id
        self.name = name
        self.url = url
        self.cch = None
        self.stack = []

    def __getitem__(self, i):
        return self.ch()[i]

    def __len__(self):
        return len(self.ch())

    def ch(self):
        if self.cch is None:
            page = soup(htcache.fetch(self.url))
            cls = None
            for div in page.findAll("div", attrs={"class": "post"}):
                if div.h3 is not None and u"Chapter List" in div.h3.string:
                    cls = div
                    break
            if cls is None:
                raise Exception("parse error: no chapter list found for %r" % self)
            cch = []
            for tr in cls.table.findAll("tr"):
                lcol = tr.findAll("td")[1]
                if lcol.a is None: continue
                link = lcol.a
                url = link["href"].encode("us-ascii")
                name = link["title"]
                cid = name.encode("utf-8")
                cch.append(chapter(self, [(self, len(cch))], cid, name, url))
            self.cch = cch
        return self.cch

    def __str__(self):
        return self.name

    def __repr__(self):
        return "<rawsen.manga %r>" % self.name

class library(lib.library):
    def __init__(self):
        self.base = "http://raw.senmanga.com/"

    def byid(self, id):
        url = urlparse.urljoin(self.base, id + "/")
        page = soup(htcache.fetch(url))
        name = None
        for div in page.findAll("div", attrs={"class": "post"}):
            if div.h2 is not None and div.h2.a is not None:
                curl = div.h2.a["href"].encode("us-ascii")
                if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue
                if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue
                name = div.h2.a.string
        if name is None:
            raise KeyError(id)
        return manga(self, id, name, url)

    def __iter__(self):
        page = soup(htcache.fetch(self.base + "Manga/"))
        for part in page.find("div", attrs={"class": "post"}).findAll("table"):
            for row in part.findAll("tr"):
                link = row.findAll("td")[1].a
                if link is None:
                    continue
                url = link["href"].encode("us-ascii")
                name = link.string
                if len(url) < 3 or url[:1] != '/' or url[-1:] != '/':
                    continue
                id = url[1:-1]
                yield manga(self, id, name, urlparse.urljoin(self.base, url))

    def byname(self, prefix):
        if not isinstance(prefix, unicode):
            prefix = prefix.decode("utf8")
        prefix = prefix.lower()
        for manga in self:
            if manga.name.lower()[:len(prefix)] == prefix:
                yield manga

    def search(self, expr):
        if not isinstance(expr, unicode):
            expr = expr.decode("utf8")
        expr = expr.lower()
        for manga in self:
            if expr in manga.name.lower():
                yield manga
Commit	Line	Data
50f7a215 FT	1	import BeautifulSoup, urlparse
	2	import lib, htcache
	3	soup = BeautifulSoup.BeautifulSoup
	4
	5	class page(lib.page):
	6	def __init__(self, chapter, stack, n, url):
	7	self.stack = stack
	8	self.chapter = chapter
	9	self.manga = chapter.manga
	10	self.n = n
	11	self.id = str(n)
	12	self.name = u"Page " + unicode(n)
	13	self.url = url
	14	self.ciurl = None
	15
	16	def iurl(self):
	17	if self.ciurl is None:
	18	page = soup(htcache.fetch(self.url))
	19	for tr in page.findAll("tr"):
	20	img = tr.find("img", id="picture")
	21	if img is not None:
	22	self.ciurl = urlparse.urljoin(self.url, img["src"].encode("us-ascii"))
	23	if self.ciurl is None:
	24	raise Exception("parse error: could not find image url for %r" % self)
	25	return self.ciurl
	26
	27	def open(self):
	28	return lib.stdimgstream(self.iurl())
	29
	30	def __str__(self):
	31	return self.name
	32
	33	def __repr__(self):
	34	return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)
	35
	36	class chapter(lib.pagelist):
	37	def __init__(self, manga, stack, id, name, url):
	38	self.stack = stack
	39	self.manga = manga
	40	self.id = id
	41	self.name = name
	42	self.url = url
	43	self.cpag = None
	44
	45	def __getitem__(self, i):
	46	return self.pages()[i]
	47
	48	def __len__(self):
	49	return len(self.pages())
	50
	51	def pages(self):
	52	if self.cpag is None:
	53	if self.url[-2:] != "/1":
	54	raise Exception("parse error: unexpected first page url for %r" % self)
	55	base = self.url[:-1]
	56	pg = soup(htcache.fetch(self.url))
	57	pag = []
	58	for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"):
	59	n = int(opt["value"])
	60	url = urlparse.urljoin(base, str(n))
	61	pag.append(page(self, self.stack + [(self, len(pag))], n, url))
	62	self.cpag = pag
	63	return self.cpag
	64
65	def __str__(self):
66	return self.name
67
68	def __repr__(self):
69	return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name)
70
71	class manga(lib.manga):
72	def __init__(self, lib, id, name, url):
73	self.lib = lib
74	self.id = id
75	self.name = name
76	self.url = url
77	self.cch = None
78	self.stack = []
79
80	def __getitem__(self, i):
81	return self.ch()[i]
82
83	def __len__(self):
84	return len(self.ch())
85
86	def ch(self):
87	if self.cch is None:
88	page = soup(htcache.fetch(self.url))
89	cls = None
90	for div in page.findAll("div", attrs={"class": "post"}):
91	if div.h3 is not None and u"Chapter List" in div.h3.string:
92	cls = div
93	break
94	if cls is None:
95	raise Exception("parse error: no chapter list found for %r" % self)
96	cch = []
97	for tr in cls.table.findAll("tr"):
98	lcol = tr.findAll("td")[1]
99	if lcol.a is None: continue
100	link = lcol.a
101	url = link["href"].encode("us-ascii")
102	name = link["title"]
103	cid = name.encode("utf-8")
104	cch.append(chapter(self, [(self, len(cch))], cid, name, url))
105	self.cch = cch
106	return self.cch
107
108	def __str__(self):
109	return self.name
110
111	def __repr__(self):
112	return "<rawsen.manga %r>" % self.name
113
114	class library(lib.library):
115	def __init__(self):
116	self.base = "http://raw.senmanga.com/"
117
118	def byid(self, id):
119	url = urlparse.urljoin(self.base, id + "/")
120	page = soup(htcache.fetch(url))
121	name = None
122	for div in page.findAll("div", attrs={"class": "post"}):
123	if div.h2 is not None and div.h2.a is not None:
124	curl = div.h2.a["href"].encode("us-ascii")
125	if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue
126	if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue
127	name = div.h2.a.string
128	if name is None:
129	raise KeyError(id)
130	return manga(self, id, name, url)
131
132	def __iter__(self):
133	page = soup(htcache.fetch(self.base + "Manga/"))
134	for part in page.find("div", attrs={"class": "post"}).findAll("table"):
135	for row in part.findAll("tr"):
136	link = row.findAll("td")[1].a
137	if link is None:
138	continue
139	url = link["href"].encode("us-ascii")
140	name = link.string
141	if len(url) < 3 or url[:1] != '/' or url[-1:] != '/':
142	continue
143	id = url[1:-1]
144	yield manga(self, id, name, urlparse.urljoin(self.base, url))
145
146	def byname(self, prefix):
147	if not isinstance(prefix, unicode):
148	prefix = prefix.decode("utf8")
149	prefix = prefix.lower()
150	for manga in self:
151	if manga.name.lower()[:len(prefix)] == prefix:
152	yield manga
153
154	def search(self, expr):
155	if not isinstance(expr, unicode):
156	expr = expr.decode("utf8")
157	expr = expr.lower()
158	for manga in self:
159	if expr in manga.name.lower():
160	yield manga