[automanga.git] / manga / batoto.py

import urllib, re, BeautifulSoup
import lib, htcache
soup = BeautifulSoup.BeautifulSoup

def byclass(el, name, cl):
    for ch in el.findAll(name):
        if not isinstance(ch, BeautifulSoup.Tag): continue
        cll = ch.get("class", "")
        if cl in cll.split():
            return ch
    return None

def nextel(el):
    while True:
        el = el.nextSibling
        if isinstance(el, BeautifulSoup.Tag):
            return el

class page(lib.page):
    def __init__(self, chapter, stack, n, url):
        self.stack = stack
        self.chapter = chapter
        self.n = n
        self.id = str(n)
        self.name = u"Page %s" % n
        self.url = url
        self.ciurl = None

    def iurl(self):
        if self.ciurl is None:
            page = soup(htcache.fetch(self.url))
            img = nextel(page.find("div", id="full_image")).img
            self.ciurl = img["src"].encode("us-ascii")
        return self.ciurl

    def open(self):
        return lib.stdimgstream(self.iurl())

    def __str__(self):
        return self.name

    def __repr(self):
        return "<batoto.page %r.%r.%r>" % (self.chapter.manga.name, self.chapter.name, self.name)

class chapter(lib.pagelist):
    def __init__(self, manga, stack, id, name, url):
        self.stack = stack
        self.manga = manga
        self.id = id
        self.name = name
        self.url = url
        self.cpag = None

    def __getitem__(self, i):
        return self.pages()[i]

    def __len__(self):
        return len(self.pages())

    pnre = re.compile(r"page (\d+)")
    def pages(self):
        if self.cpag is None:
            pg = soup(htcache.fetch(self.url))
            cpag = []
            for opt in pg.find("select", id="page_select").findAll("option"):
                url = opt["value"].encode("us-ascii")
                n = int(self.pnre.match(opt.string).group(1))
                cpag.append(page(self, self.stack + [(self, len(cpag))], n, url))
            self.cpag = cpag
        return self.cpag

    def __str__(self):
        return self.name

    def __repr__(self):
        return "<batoto.chapter %r.%r>" % (self.manga.name, self.name)

class manga(lib.manga):
    def __init__(self, lib, id, name, url):
        self.lib = lib
        self.id = id
        self.name = name
        self.url = url
        self.cch = None
        self.stack = []

    def __getitem__(self, i):
        return self.ch()[i]

    def __len__(self):
        return len(self.ch())

    cure = re.compile(r"/read/_/(\d+)/[^/]*")
    def ch(self):
        if self.cch is None:
            page = soup(htcache.fetch(self.url))
            cls = byclass(page, u"table", u"chapters_list")
            if cls.tbody is not None:
                cls = cls.tbody
            scl = u"lang_" + self.lib.lang
            cch = []
            for ch in cls.childGenerator():
                if isinstance(ch, BeautifulSoup.Tag) and ch.name == u"tr":
                    cll = ch.get("class", "").split()
                    if u"row" in cll and scl in cll:
                        url = ch.td.a["href"].encode("us-ascii")
                        m = self.cure.search(url)
                        if m is None: raise Exception("Got weird chapter URL: %r" % url)
                        cid = m.group(1)
                        url = self.lib.base + "read/_/" + cid
                        name = ch.td.a.text
                        cch.append((cid, name, url))
            cch.reverse()
            rch = []
            for n, (cid, name, url) in enumerate(cch):
                rch.append(chapter(self, [(self, n)], cid, name, url))
            self.cch = rch
        return self.cch

    def __str__(self):
        return self.name

    def __repr__(self):
        return "<batoto.manga %r>" % self.name

class library(lib.library):
    def __init__(self):
        self.base = "http://www.batoto.net/"
        self.lang = u"English"

    def byid(self, id):
        url = self.base + "comic/_/comics/" + id
        page = soup(htcache.fetch(url))
        title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
        if title is None:
            raise KeyError(id)
        return manga(self, id, title.string.strip(), url)

    mure = re.compile(r"/comic/_/comics/([^/]*)$")
    def search(self, expr):
        resp = urllib.urlopen(self.base + "forums/index.php?app=core&module=search&do=search&fromMainBar=1",
                              urllib.urlencode({"search_term": expr, "search_app": "ccs:database:3"}))
        try:
            page = soup(resp.read())
        finally:
            resp.close()
        ret = []
        for child in page.find("div", id="search_results").ol.childGenerator():
            if isinstance(child, BeautifulSoup.Tag) and child.name == u"li":
                info = child.find("div", attrs={"class": "result_info"})
                url = info.h3.a["href"].encode("us-ascii")
                m = self.mure.search(url)
                if m is None: raise Exception("Got weird manga URL: %r" % url)
                id = m.group(1)
                name = info.h3.a.string.strip()
                ret.append(manga(self, id, name, url))
        return ret
Commit	Line	Data
08e259d7 FT	1	import urllib, re, BeautifulSoup
	2	import lib, htcache
	3	soup = BeautifulSoup.BeautifulSoup
	4
	5	def byclass(el, name, cl):
	6	for ch in el.findAll(name):
	7	if not isinstance(ch, BeautifulSoup.Tag): continue
	8	cll = ch.get("class", "")
	9	if cl in cll.split():
	10	return ch
	11	return None
	12
	13	def nextel(el):
	14	while True:
	15	el = el.nextSibling
	16	if isinstance(el, BeautifulSoup.Tag):
	17	return el
	18
	19	class page(lib.page):
	20	def __init__(self, chapter, stack, n, url):
	21	self.stack = stack
	22	self.chapter = chapter
	23	self.n = n
	24	self.id = str(n)
	25	self.name = u"Page %s" % n
	26	self.url = url
	27	self.ciurl = None
	28
	29	def iurl(self):
	30	if self.ciurl is None:
	31	page = soup(htcache.fetch(self.url))
	32	img = nextel(page.find("div", id="full_image")).img
	33	self.ciurl = img["src"].encode("us-ascii")
	34	return self.ciurl
	35
	36	def open(self):
	37	return lib.stdimgstream(self.iurl())
	38
	39	def __str__(self):
	40	return self.name
	41
	42	def __repr(self):
	43	return "<batoto.page %r.%r.%r>" % (self.chapter.manga.name, self.chapter.name, self.name)
	44
	45	class chapter(lib.pagelist):
	46	def __init__(self, manga, stack, id, name, url):
	47	self.stack = stack
	48	self.manga = manga
	49	self.id = id
	50	self.name = name
	51	self.url = url
	52	self.cpag = None
	53
	54	def __getitem__(self, i):
	55	return self.pages()[i]
	56
	57	def __len__(self):
	58	return len(self.pages())
	59
	60	pnre = re.compile(r"page (\d+)")
	61	def pages(self):
	62	if self.cpag is None:
	63	pg = soup(htcache.fetch(self.url))
	64	cpag = []
65	for opt in pg.find("select", id="page_select").findAll("option"):
66	url = opt["value"].encode("us-ascii")
67	n = int(self.pnre.match(opt.string).group(1))
68	cpag.append(page(self, self.stack + [(self, len(cpag))], n, url))
69	self.cpag = cpag
70	return self.cpag
71
72	def __str__(self):
73	return self.name
74
75	def __repr__(self):
76	return "<batoto.chapter %r.%r>" % (self.manga.name, self.name)
77
78	class manga(lib.manga):
79	def __init__(self, lib, id, name, url):
80	self.lib = lib
81	self.id = id
82	self.name = name
83	self.url = url
84	self.cch = None
85	self.stack = []
86
87	def __getitem__(self, i):
88	return self.ch()[i]
89
90	def __len__(self):
91	return len(self.ch())
92
93	cure = re.compile(r"/read/_/(\d+)/[^/]*")
94	def ch(self):
95	if self.cch is None:
96	page = soup(htcache.fetch(self.url))
97	cls = byclass(page, u"table", u"chapters_list")
98	if cls.tbody is not None:
99	cls = cls.tbody
100	scl = u"lang_" + self.lib.lang
101	cch = []
102	for ch in cls.childGenerator():
103	if isinstance(ch, BeautifulSoup.Tag) and ch.name == u"tr":
104	cll = ch.get("class", "").split()
105	if u"row" in cll and scl in cll:
106	url = ch.td.a["href"].encode("us-ascii")
107	m = self.cure.search(url)
108	if m is None: raise Exception("Got weird chapter URL: %r" % url)
109	cid = m.group(1)
110	url = self.lib.base + "read/_/" + cid
111	name = ch.td.a.text
687f2ed3	112	cch.append((cid, name, url))
08e259d7	113	cch.reverse()
687f2ed3 FT	114	rch = []
	115	for n, (cid, name, url) in enumerate(cch):
	116	rch.append(chapter(self, [(self, n)], cid, name, url))
	117	self.cch = rch
08e259d7 FT	118	return self.cch
	119
	120	def __str__(self):
	121	return self.name
	122
	123	def __repr__(self):
	124	return "<batoto.manga %r>" % self.name
	125
	126	class library(lib.library):
	127	def __init__(self):
	128	self.base = "http://www.batoto.net/"
	129	self.lang = u"English"
	130
	131	def byid(self, id):
	132	url = self.base + "comic/_/comics/" + id
	133	page = soup(htcache.fetch(url))
	134	title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
	135	if title is None:
	136	raise KeyError(id)
	137	return manga(self, id, title.string.strip(), url)
	138
	139	mure = re.compile(r"/comic/_/comics/([^/]*)$")
	140	def search(self, expr):
	141	resp = urllib.urlopen(self.base + "forums/index.php?app=core&module=search&do=search&fromMainBar=1",
	142	urllib.urlencode({"search_term": expr, "search_app": "ccs:database:3"}))
	143	try:
	144	page = soup(resp.read())
	145	finally:
	146	resp.close()
	147	ret = []
	148	for child in page.find("div", id="search_results").ol.childGenerator():
	149	if isinstance(child, BeautifulSoup.Tag) and child.name == u"li":
	150	info = child.find("div", attrs={"class": "result_info"})
	151	url = info.h3.a["href"].encode("us-ascii")
	152	m = self.mure.search(url)
	153	if m is None: raise Exception("Got weird manga URL: %r" % url)
	154	id = m.group(1)
	155	name = info.h3.a.string.strip()
	156	ret.append(manga(self, id, name, url))
	157	return ret