MangaRead parser #17

This commit is contained in:
Koitharu
2020-10-19 20:24:58 +03:00
parent a8c22de601
commit 2135195f27
9 changed files with 222 additions and 24 deletions

View File

@@ -23,6 +23,7 @@ enum class MangaSource(
YAOICHAN("Яой-тян", "ru", YaoiChanRepository::class.java),
MANGATOWN("MangaTown", "en", MangaTownRepository::class.java),
MANGALIB("MangaLib", "ru", MangaLibRepository::class.java),
NUDEMOON("Nude-Moon", "ru", NudeMoonRepository::class.java)
NUDEMOON("Nude-Moon", "ru", NudeMoonRepository::class.java),
MANGAREAD("MangaRead", "en", MangareadRepository::class.java),
// HENTAILIB("HentaiLib", "ru", HentaiLibRepository::class.java)
}

View File

@@ -106,7 +106,7 @@ abstract class ChanRepository(loaderContext: MangaLoaderContext) : RemoteMangaRe
val json = data.substring(pos).substringAfter('[').substringBefore(';')
.substringBeforeLast(']')
return json.split(",").mapNotNull {
it.trim().removeSurrounding('"','\'').takeUnless(String::isBlank)
it.trim().removeSurrounding('"', '\'').takeUnless(String::isBlank)
}.map { url ->
MangaPage(
id = url.longHashCode(),

View File

@@ -32,14 +32,18 @@ abstract class GroupleRepository(loaderContext: MangaLoaderContext) :
mapOf("q" to query.urlEncoded(), "offset" to offset.toString())
)
tag == null -> loaderContext.httpGet(
"https://$domain/list?sortType=${getSortKey(
sortOrder
)}&offset=$offset"
"https://$domain/list?sortType=${
getSortKey(
sortOrder
)
}&offset=$offset"
)
else -> loaderContext.httpGet(
"https://$domain/list/genre/${tag.key}?sortType=${getSortKey(
sortOrder
)}&offset=$offset"
"https://$domain/list/genre/${tag.key}?sortType=${
getSortKey(
sortOrder
)
}&offset=$offset"
)
}.parseHtml()
val root = doc.body().getElementById("mangaBox")

View File

@@ -10,7 +10,8 @@ import org.koitharu.kotatsu.domain.MangaLoaderContext
import org.koitharu.kotatsu.utils.ext.*
import java.util.*
class MangaTownRepository(loaderContext: MangaLoaderContext) : RemoteMangaRepository(loaderContext) {
class MangaTownRepository(loaderContext: MangaLoaderContext) :
RemoteMangaRepository(loaderContext) {
override val source = MangaSource.MANGATOWN
@@ -105,16 +106,17 @@ class MangaTownRepository(loaderContext: MangaLoaderContext) : RemoteMangaReposi
}.orEmpty(),
description = info.getElementById("show")?.ownText(),
chapters = chaptersList?.mapIndexedNotNull { i, li ->
val href = li.selectFirst("a").attr("href").withDomain(domain, ssl)
val name = li.select("span").filter { it.className().isEmpty() }.joinToString(" - ") { it.text() }.trim()
MangaChapter(
id = href.longHashCode(),
url = href,
source = MangaSource.MANGATOWN,
number = i + 1,
name = if (name.isEmpty()) "${manga.title} - ${i + 1}" else name
)
}
val href = li.selectFirst("a").attr("href").withDomain(domain, ssl)
val name = li.select("span").filter { it.className().isEmpty() }
.joinToString(" - ") { it.text() }.trim()
MangaChapter(
id = href.longHashCode(),
url = href,
source = MangaSource.MANGATOWN,
number = i + 1,
name = if (name.isEmpty()) "${manga.title} - ${i + 1}" else name
)
}
)
}
@@ -166,7 +168,8 @@ class MangaTownRepository(loaderContext: MangaLoaderContext) : RemoteMangaReposi
}
override fun onCreatePreferences() = arraySetOf(R.string.key_parser_domain, R.string.key_parser_ssl)
override fun onCreatePreferences() =
arraySetOf(R.string.key_parser_domain, R.string.key_parser_ssl)
private fun String.parseTagKey() = split('/').findLast { TAG_REGEX matches it }

View File

@@ -0,0 +1,165 @@
package org.koitharu.kotatsu.core.parser.site
import androidx.collection.arraySetOf
import org.koitharu.kotatsu.R
import org.koitharu.kotatsu.core.exceptions.ParseException
import org.koitharu.kotatsu.core.model.*
import org.koitharu.kotatsu.core.parser.RemoteMangaRepository
import org.koitharu.kotatsu.domain.MangaLoaderContext
import org.koitharu.kotatsu.utils.ext.*
class MangareadRepository(
loaderContext: MangaLoaderContext
) : RemoteMangaRepository(loaderContext) {
override val source = MangaSource.MANGAREAD
override val sortOrders = arraySetOf(SortOrder.UPDATED, SortOrder.POPULARITY)
override suspend fun getList(
offset: Int,
query: String?,
sortOrder: SortOrder?,
tag: MangaTag?
): List<Manga> {
if (offset % PAGE_SIZE != 0) {
return emptyList()
}
val domain = conf.getDomain(DOMAIN)
val payload = createRequestTemplate()
payload["page"] = (offset / PAGE_SIZE).toString()
payload["vars[meta_key]"] = when (sortOrder) {
SortOrder.POPULARITY -> "_wp_manga_views"
SortOrder.UPDATED -> "_latest_update"
else -> "_wp_manga_views"
}
payload["vars[wp-manga-genre]"] = tag?.key.orEmpty()
payload["vars[s]"] = query.orEmpty()
val doc = loaderContext.httpPost(
"https://${domain}/wp-admin/admin-ajax.php",
payload
).parseHtml()
return doc.select("div.row.c-tabs-item__content").map { div ->
val href = div.selectFirst("a").absUrl("href")
val summary = div.selectFirst(".tab-summary")
Manga(
id = href.longHashCode(),
url = href,
coverUrl = div.selectFirst("img").absUrl("src"),
title = summary.selectFirst("h3").text(),
rating = div.selectFirst("span.total_votes")?.ownText()
?.toFloatOrNull()?.div(5f) ?: -1f,
tags = summary.selectFirst(".mg_genres").select("a").mapToSet { a ->
MangaTag(
key = a.attr("href").removeSuffix("/").substringAfterLast('/'),
title = a.text(),
source = MangaSource.MANGAREAD
)
},
author = summary.selectFirst(".mg_author")?.selectFirst("a")?.ownText(),
state = when (summary.selectFirst(".mg_status")?.selectFirst(".summary-content")
?.ownText()?.trim()) {
"OnGoing" -> MangaState.ONGOING
"Completed" -> MangaState.FINISHED
else -> null
},
source = MangaSource.MANGAREAD
)
}
}
override suspend fun getTags(): Set<MangaTag> {
val domain = conf.getDomain(DOMAIN)
val doc = loaderContext.httpGet("https://$domain/manga/").parseHtml()
val root = doc.body().getElementById("main-sidebar")
.selectFirst(".genres_wrap")
.selectFirst("ul")
return root.select("li").mapToSet { li ->
val a = li.selectFirst("a")
MangaTag(
key = a.attr("href").removeSuffix("/").substringAfterLast('/'),
title = a.text(),
source = MangaSource.MANGAREAD
)
}
}
override suspend fun getDetails(manga: Manga): Manga {
val domain = conf.getDomain(DOMAIN)
val doc = loaderContext.httpGet(manga.url).parseHtml()
val root = doc.body().selectFirst("div.profile-manga")
?.selectFirst("div.summary_content")
?.selectFirst("div.post-content")
?: throw ParseException("Root not found")
val root2 = doc.body().selectFirst("div.content-area")
?.selectFirst("div.c-page")
?: throw ParseException("Root2 not found")
val mangaId = doc.getElementsByAttribute("data-postid").firstOrNull()
?.attr("data-postid")?.toLongOrNull()
?: throw ParseException("Cannot obtain manga id")
val doc2 = loaderContext.httpPost(
"https://${domain}/wp-admin/admin-ajax.php",
mapOf(
"action" to "manga_get_chapters",
"manga" to mangaId.toString()
)
).parseHtml()
return manga.copy(
tags = root.selectFirst("div.genres-content")?.select("a")
?.mapNotNullToSet { a ->
MangaTag(
key = a.attr("href").removeSuffix("/").substringAfterLast('/'),
title = a.text(),
source = MangaSource.MANGAREAD
)
} ?: manga.tags,
description = root2.selectFirst("div.description-summary")
?.selectFirst("div.summary__content")
?.select("p")?.drop(1)
?.joinToString { it.html() },
chapters = doc2.select("li").asReversed().mapIndexed { i, li ->
val a = li.selectFirst("a")
val href = a.absUrl("href")
MangaChapter(
id = href.longHashCode(),
name = a.ownText(),
number = i + 1,
url = href,
source = MangaSource.MANGAREAD
)
}
)
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val doc = loaderContext.httpGet(chapter.url).parseHtml()
val root = doc.body().selectFirst("div.main-col-inner")
?.selectFirst("div.reading-content")
?: throw ParseException("Root not found")
return root.select("div.page-break").map { div ->
val img = div.selectFirst("img")
val url = img.absUrl("src")
MangaPage(
id = url.longHashCode(),
url = url,
source = MangaSource.MANGAREAD
)
}
}
override fun onCreatePreferences() = arraySetOf(R.string.key_parser_domain)
private companion object {
private const val PAGE_SIZE = 12
private const val DOMAIN = "www.mangaread.org"
private fun createRequestTemplate() =
"action=madara_load_more&page=1&template=madara-core%2Fcontent%2Fcontent-search&vars%5Bs%5D=&vars%5Borderby%5D=meta_value_num&vars%5Bpaged%5D=1&vars%5Btemplate%5D=search&vars%5Bmeta_query%5D%5B0%5D%5Brelation%5D=AND&vars%5Bmeta_query%5D%5Brelation%5D=OR&vars%5Bpost_type%5D=wp-manga&vars%5Bpost_status%5D=publish&vars%5Bmeta_key%5D=_latest_update&vars%5Border%5D=desc&vars%5Bmanga_archives_item_layout%5D=default"
.split('&')
.map {
val pos = it.indexOf('=')
it.substring(0, pos) to it.substring(pos + 1)
}.toMutableMap()
}
}

View File

@@ -41,6 +41,29 @@ open class MangaLoaderContext : KoinComponent {
return okHttp.newCall(request.build()).await()
}
suspend fun httpPost(
url: String,
payload: String,
block: (Request.Builder.() -> Unit)? = null
): Response {
val body = FormBody.Builder()
payload.split('&').forEach {
val pos = it.indexOf('=')
if (pos != -1) {
val k = it.substring(0, pos)
val v = it.substring(pos + 1)
body.addEncoded(k, v)
}
}
val request = Request.Builder()
.post(body.build())
.url(url)
if (block != null) {
request.block()
}
return okHttp.newCall(request.build()).await()
}
open fun getSettings(source: MangaSource) = SourceConfig(get(), source)
fun insertCookies(domain: String, vararg cookies: String) {

View File

@@ -54,4 +54,6 @@ fun LongArray.toArraySet(): Set<Long> {
}
}
}
}
}
fun <K, V> List<Pair<K, V>>.toMutableMap(): MutableMap<K, V> = toMap(HashMap<K, V>(size))

View File

@@ -90,7 +90,7 @@ class RemoteRepositoryTest(source: MangaSource) {
factory {
OkHttpClient.Builder()
.cookieJar(TemporaryCookieJar())
.addInterceptor(UserAgentInterceptor)
.addInterceptor(UserAgentInterceptor())
.connectTimeout(20, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS)
.writeTimeout(20, TimeUnit.SECONDS)

View File

@@ -3,8 +3,8 @@ package org.koitharu.kotatsu.utils
import okhttp3.OkHttpClient
import okhttp3.Request
import org.junit.Assert
import org.koin.core.KoinComponent
import org.koin.core.inject
import org.koin.core.component.KoinComponent
import org.koin.core.component.inject
import java.net.HttpURLConnection
object AssertX : KoinComponent {