[automanga.git] / manga / rawsen.py

import BeautifulSoup, urlparse
import lib, htcache
soup = BeautifulSoup.BeautifulSoup
soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES)

class page(lib.page):
    def __init__(self, chapter, stack, n, url):
        self.stack = stack
        self.chapter = chapter
        self.manga = chapter.manga
        self.n = n
        self.id = str(n)
        self.name = u"Page " + unicode(n)
        self.url = url
        self.ciurl = None

    def iurl(self):
        if self.ciurl is None:
            page = soupify(htcache.fetch(self.url))
            for tr in page.findAll("tr"):
                img = tr.find("img", id="picture")
                if img is not None:
                    self.ciurl = urlparse.urljoin(self.url, img["src"].encode("us-ascii"))
            if self.ciurl is None:
                raise Exception("parse error: could not find image url for %r" % self)
        return self.ciurl

    def open(self):
        return lib.stdimgstream(self.iurl())

    def __str__(self):
        return self.name

    def __repr__(self):
        return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)

class chapter(lib.pagelist):
    def __init__(self, manga, stack, id, name, url):
        self.stack = stack
        self.manga = manga
        self.id = id
        self.name = name
        self.url = url
        self.cpag = None

    def __getitem__(self, i):
        return self.pages()[i]

    def __len__(self):
        return len(self.pages())

    def pages(self):
        if self.cpag is None:
            if self.url[-2:] != "/1":
                raise Exception("parse error: unexpected first page url for %r" % self)
            base = self.url[:-1]
            pg = soupify(htcache.fetch(self.url))
            pag = []
            for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"):
                n = int(opt["value"])
                url = urlparse.urljoin(base, str(n))
                pag.append(page(self, self.stack + [(self, len(pag))], n, url))
            self.cpag = pag
        return self.cpag

    def __str__(self):
        return self.name

    def __repr__(self):
        return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name)

class manga(lib.manga):
    def __init__(self, lib, id, name, url):
        self.lib = lib
        self.id = id
        self.name = name
        self.url = url
        self.cch = None
        self.stack = []

    def __getitem__(self, i):
        return self.ch()[i]

    def __len__(self):
        return len(self.ch())

    def ch(self):
        if self.cch is None:
            page = soupify(htcache.fetch(self.url))
            cls = None
            for div in page.findAll("div", attrs={"class": "post"}):
                if div.h3 is not None and u"Chapter List" in div.h3.string:
                    cls = div
                    break
            if cls is None:
                raise Exception("parse error: no chapter list found for %r" % self)
            cch = []
            for tr in cls.table.findAll("tr"):
                lcol = tr.findAll("td")[1]
                if lcol.a is None: continue
                link = lcol.a
                url = link["href"].encode("us-ascii")
                name = link["title"]
                cid = name.encode("utf-8")
                cch.append(chapter(self, [(self, len(cch))], cid, name, url))
            self.cch = cch
        return self.cch

    def __str__(self):
        return self.name

    def __repr__(self):
        return "<rawsen.manga %r>" % self.name

class library(lib.library):
    def __init__(self):
        self.base = "http://raw.senmanga.com/"

    def byid(self, id):
        url = urlparse.urljoin(self.base, id + "/")
        page = soupify(htcache.fetch(url))
        name = None
        for div in page.findAll("div", attrs={"class": "post"}):
            if div.h2 is not None and div.h2.a is not None:
                curl = div.h2.a["href"].encode("us-ascii")
                if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue
                if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue
                name = div.h2.a.string
        if name is None:
            raise KeyError(id)
        return manga(self, id, name, url)

    def __iter__(self):
        page = soupify(htcache.fetch(self.base + "Manga/"))
        for part in page.find("div", attrs={"class": "post"}).findAll("table"):
            for row in part.findAll("tr"):
                link = row.findAll("td")[1].a
                if link is None:
                    continue
                url = link["href"].encode("us-ascii")
                name = link.string
                if len(url) < 3 or url[:1] != '/' or url[-1:] != '/':
                    continue
                id = url[1:-1]
                yield manga(self, id, name, urlparse.urljoin(self.base, url))

    def byname(self, prefix):
        if not isinstance(prefix, unicode):
            prefix = prefix.decode("utf8")
        prefix = prefix.lower()
        for manga in self:
            if manga.name.lower()[:len(prefix)] == prefix:
                yield manga

    def search(self, expr):
        if not isinstance(expr, unicode):
            expr = expr.decode("utf8")
        expr = expr.lower()
        for manga in self:
            if expr in manga.name.lower():
                yield manga
Commit	Line	Data
	1	import BeautifulSoup, urlparse
	2	import lib, htcache
	3	soup = BeautifulSoup.BeautifulSoup
	4	soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES)
	5
	6	class page(lib.page):
	7	def __init__(self, chapter, stack, n, url):
	8	self.stack = stack
	9	self.chapter = chapter
	10	self.manga = chapter.manga
	11	self.n = n
	12	self.id = str(n)
	13	self.name = u"Page " + unicode(n)
	14	self.url = url
	15	self.ciurl = None
	16
	17	def iurl(self):
	18	if self.ciurl is None:
	19	page = soupify(htcache.fetch(self.url))
	20	for tr in page.findAll("tr"):
	21	img = tr.find("img", id="picture")
	22	if img is not None:
	23	self.ciurl = urlparse.urljoin(self.url, img["src"].encode("us-ascii"))
	24	if self.ciurl is None:
	25	raise Exception("parse error: could not find image url for %r" % self)
	26	return self.ciurl
	27
	28	def open(self):
	29	return lib.stdimgstream(self.iurl())
	30
	31	def __str__(self):
	32	return self.name
	33
	34	def __repr__(self):
	35	return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name)
	36
	37	class chapter(lib.pagelist):
	38	def __init__(self, manga, stack, id, name, url):
	39	self.stack = stack
	40	self.manga = manga
	41	self.id = id
	42	self.name = name
	43	self.url = url
	44	self.cpag = None
	45
	46	def __getitem__(self, i):
	47	return self.pages()[i]
	48
	49	def __len__(self):
	50	return len(self.pages())
	51
	52	def pages(self):
	53	if self.cpag is None:
	54	if self.url[-2:] != "/1":
	55	raise Exception("parse error: unexpected first page url for %r" % self)
	56	base = self.url[:-1]
	57	pg = soupify(htcache.fetch(self.url))
	58	pag = []
	59	for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"):
	60	n = int(opt["value"])
	61	url = urlparse.urljoin(base, str(n))
	62	pag.append(page(self, self.stack + [(self, len(pag))], n, url))
	63	self.cpag = pag
	64	return self.cpag
	65
	66	def __str__(self):
	67	return self.name
	68
	69	def __repr__(self):
	70	return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name)
	71
	72	class manga(lib.manga):
	73	def __init__(self, lib, id, name, url):
	74	self.lib = lib
	75	self.id = id
	76	self.name = name
	77	self.url = url
	78	self.cch = None
	79	self.stack = []
	80
	81	def __getitem__(self, i):
	82	return self.ch()[i]
	83
	84	def __len__(self):
	85	return len(self.ch())
	86
	87	def ch(self):
	88	if self.cch is None:
	89	page = soupify(htcache.fetch(self.url))
	90	cls = None
	91	for div in page.findAll("div", attrs={"class": "post"}):
	92	if div.h3 is not None and u"Chapter List" in div.h3.string:
	93	cls = div
	94	break
	95	if cls is None:
	96	raise Exception("parse error: no chapter list found for %r" % self)
	97	cch = []
	98	for tr in cls.table.findAll("tr"):
	99	lcol = tr.findAll("td")[1]
	100	if lcol.a is None: continue
	101	link = lcol.a
	102	url = link["href"].encode("us-ascii")
	103	name = link["title"]
	104	cid = name.encode("utf-8")
	105	cch.append(chapter(self, [(self, len(cch))], cid, name, url))
	106	self.cch = cch
	107	return self.cch
	108
	109	def __str__(self):
	110	return self.name
	111
	112	def __repr__(self):
	113	return "<rawsen.manga %r>" % self.name
	114
	115	class library(lib.library):
	116	def __init__(self):
	117	self.base = "http://raw.senmanga.com/"
	118
	119	def byid(self, id):
	120	url = urlparse.urljoin(self.base, id + "/")
	121	page = soupify(htcache.fetch(url))
	122	name = None
	123	for div in page.findAll("div", attrs={"class": "post"}):
	124	if div.h2 is not None and div.h2.a is not None:
	125	curl = div.h2.a["href"].encode("us-ascii")
	126	if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue
	127	if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue
	128	name = div.h2.a.string
	129	if name is None:
	130	raise KeyError(id)
	131	return manga(self, id, name, url)
	132
	133	def __iter__(self):
	134	page = soupify(htcache.fetch(self.base + "Manga/"))
	135	for part in page.find("div", attrs={"class": "post"}).findAll("table"):
	136	for row in part.findAll("tr"):
	137	link = row.findAll("td")[1].a
	138	if link is None:
	139	continue
	140	url = link["href"].encode("us-ascii")
	141	name = link.string
	142	if len(url) < 3 or url[:1] != '/' or url[-1:] != '/':
	143	continue
	144	id = url[1:-1]
	145	yield manga(self, id, name, urlparse.urljoin(self.base, url))
	146
	147	def byname(self, prefix):
	148	if not isinstance(prefix, unicode):
	149	prefix = prefix.decode("utf8")
	150	prefix = prefix.lower()
	151	for manga in self:
	152	if manga.name.lower()[:len(prefix)] == prefix:
	153	yield manga
	154
	155	def search(self, expr):
	156	if not isinstance(expr, unicode):
	157	expr = expr.decode("utf8")
	158	expr = expr.lower()
	159	for manga in self:
	160	if expr in manga.name.lower():
	161	yield manga