Migrate AniBel parser to graphql

This commit is contained in:
Koitharu
2021-12-03 20:29:11 +02:00
parent bbb226791b
commit 9b4aa4fd64
4 changed files with 251 additions and 129 deletions

View File

@@ -1,15 +1,21 @@
package org.koitharu.kotatsu.base.domain
import okhttp3.*
import okhttp3.MediaType.Companion.toMediaType
import okhttp3.RequestBody.Companion.toRequestBody
import org.json.JSONObject
import org.koin.core.component.KoinComponent
import org.koin.core.component.get
import org.koitharu.kotatsu.core.exceptions.GraphQLException
import org.koitharu.kotatsu.core.model.MangaSource
import org.koitharu.kotatsu.core.prefs.SourceSettings
import org.koitharu.kotatsu.utils.ext.await
import org.koitharu.kotatsu.utils.ext.parseJson
open class MangaLoaderContext(
private val okHttp: OkHttpClient,
val cookieJar: CookieJar
val cookieJar: CookieJar,
) : KoinComponent {
suspend fun httpGet(url: String, headers: Headers? = null): Response {
@@ -24,7 +30,7 @@ open class MangaLoaderContext(
suspend fun httpPost(
url: String,
form: Map<String, String>
form: Map<String, String>,
): Response {
val body = FormBody.Builder()
form.forEach { (k, v) ->
@@ -38,7 +44,7 @@ open class MangaLoaderContext(
suspend fun httpPost(
url: String,
payload: String
payload: String,
): Response {
val body = FormBody.Builder()
payload.split('&').forEach {
@@ -55,10 +61,24 @@ open class MangaLoaderContext(
return okHttp.newCall(request.build()).await()
}
open fun getSettings(source: MangaSource) = SourceSettings(get(), source)
private companion object {
private const val SCHEME_HTTP = "http"
suspend fun graphQLQuery(endpoint: String, query: String): JSONObject {
val body = JSONObject()
body.put("operationName", null)
body.put("variables", JSONObject())
body.put("query", "{${query}}")
val mediaType = "application/json; charset=utf-8".toMediaType()
val requestBody = body.toString().toRequestBody(mediaType)
val request = Request.Builder()
.post(requestBody)
.url(endpoint)
val json = okHttp.newCall(request.build()).await().parseJson()
json.optJSONArray("errors")?.let {
if (it.length() != 0) {
throw GraphQLException(it)
}
}
return json
}
open fun getSettings(source: MangaSource) = SourceSettings(get(), source)
}

View File

@@ -0,0 +1,14 @@
package org.koitharu.kotatsu.core.exceptions
import org.json.JSONArray
import org.koitharu.kotatsu.utils.ext.map
class GraphQLException(private val errors: JSONArray) : RuntimeException() {
val messages = errors.map {
it.getString("message")
}
override val message: String
get() = messages.joinToString("\n")
}

View File

@@ -1,16 +1,21 @@
package org.koitharu.kotatsu.core.parser.site
import androidx.collection.ArraySet
import org.json.JSONArray
import org.json.JSONObject
import org.koitharu.kotatsu.base.domain.MangaLoaderContext
import org.koitharu.kotatsu.core.model.*
import org.koitharu.kotatsu.core.parser.RemoteMangaRepository
import org.koitharu.kotatsu.utils.ext.*
import org.koitharu.kotatsu.utils.ext.map
import org.koitharu.kotatsu.utils.ext.mapIndexed
import org.koitharu.kotatsu.utils.ext.stringIterator
import java.util.*
class AnibelRepository(loaderContext: MangaLoaderContext) : RemoteMangaRepository(loaderContext) {
override val source = MangaSource.ANIBEL
override val defaultDomain = "old.anibel.net"
override val defaultDomain = "anibel.net"
override val sortOrders: Set<SortOrder> = EnumSet.of(
SortOrder.NEWEST
@@ -20,76 +25,119 @@ class AnibelRepository(loaderContext: MangaLoaderContext) : RemoteMangaRepositor
offset: Int,
query: String?,
tags: Set<MangaTag>?,
sortOrder: SortOrder?
sortOrder: SortOrder?,
): List<Manga> {
if (!query.isNullOrEmpty()) {
return if (offset == 0) search(query) else emptyList()
return if (offset == 0) {
search(query)
} else {
emptyList()
}
}
val page = (offset / 12f).toIntUp().inc()
val link = when {
tags.isNullOrEmpty() -> "/manga?page=$page".withDomain()
else -> tags.joinToString(
prefix = "/manga?",
postfix = "&page=$page",
separator = "&",
) { tag -> "genre[]=${tag.key}" }.withDomain()
}
val doc = loaderContext.httpGet(link).parseHtml()
val root = doc.body().select("div.manga-block") ?: parseFailed("Cannot find root")
val items = root.select("div.anime-card")
return items.mapNotNull { card ->
val href = card.selectFirst("a")?.attr("href") ?: return@mapNotNull null
val status = card.select("tr")[2].text()
val fullTitle = card.selectFirst("h1.anime-card-title")?.text()
?.substringBeforeLast('[') ?: return@mapNotNull null
val titleParts = fullTitle.splitTwoParts('/')
val filters = tags?.takeUnless { it.isEmpty() }?.joinToString(
separator = ",",
prefix = "genres: [",
postfix = "]"
) { "\"it.key\"" }.orEmpty()
val array = apiCall(
"""
getMediaList(offset: $offset, limit: 20, mediaType: manga, filters: {$filters}) {
docs {
mediaId
title {
be
alt
}
rating
poster
genres
slug
mediaType
status
}
}
""".trimIndent()
).getJSONObject("getMediaList").getJSONArray("docs")
return array.map { jo ->
val mediaId = jo.getString("mediaId")
val title = jo.getJSONObject("title")
val href = "${jo.getString("mediaType")}/${jo.getString("slug")}"
Manga(
id = generateUid(href),
title = titleParts?.first?.trim() ?: fullTitle,
coverUrl = card.selectFirst("img")?.attr("data-src")
?.withDomain().orEmpty(),
altTitle = titleParts?.second?.trim(),
id = generateUid(mediaId),
title = title.getString("be"),
coverUrl = jo.getString("poster").removePrefix("/cdn")
.withDomain("cdn") + "?width=200&height=280",
altTitle = title.getString("alt").takeUnless(String::isEmpty),
author = null,
rating = Manga.NO_RATING,
rating = jo.getDouble("rating").toFloat() / 10f,
url = href,
publicUrl = href.withDomain(),
tags = card.select("p.tupe.tag").select("a").mapNotNullToSet tags@{ x ->
MangaTag(
title = x.text(),
key = x.attr("href").ifEmpty {
return@mapNotNull null
}.substringAfterLast("="),
source = source
)
},
state = when (status) {
"выпускаецца" -> MangaState.ONGOING
"завершанае" -> MangaState.FINISHED
publicUrl = "https://${getDomain()}/${href}",
tags = jo.getJSONArray("genres").mapToTags(),
state = when (jo.getString("status")) {
"ongoing" -> MangaState.ONGOING
"finished" -> MangaState.FINISHED
else -> null
},
source = source
source = source,
)
}
}
override suspend fun getDetails(manga: Manga): Manga {
val doc = loaderContext.httpGet(manga.publicUrl).parseHtml()
val root = doc.body().select("div.container") ?: parseFailed("Cannot find root")
val (type, slug) = manga.url.split('/')
val details = apiCall(
"""
media(mediaType: $type, slug: "$slug") {
mediaId
title {
be
alt
}
description {
be
}
status
poster
rating
genres
}
""".trimIndent()
).getJSONObject("media")
val title = details.getJSONObject("title")
val poster = details.getString("poster").removePrefix("/cdn")
.withDomain("cdn")
val chapters = apiCall(
"""
chapters(mediaId: "${details.getString("mediaId")}") {
id
chapter
released
}
""".trimIndent()
).getJSONArray("chapters")
return manga.copy(
description = root.select("div.manga-block.grid-12")[2].select("p").text(),
chapters = root.select("ul.series").flatMap { table ->
table.select("li")
}.map { it.selectFirst("a") }.mapIndexedNotNull { i, a ->
val href = a?.select("a")?.first()?.attr("href")
?.toRelativeUrl(getDomain()) ?: return@mapIndexedNotNull null
title = title.getString("be"),
altTitle = title.getString("alt"),
coverUrl = "$poster?width=200&height=280",
largeCoverUrl = poster,
description = details.getJSONObject("description").getString("be"),
rating = details.getDouble("rating").toFloat() / 10f,
tags = details.getJSONArray("genres").mapToTags(),
state = when (details.getString("status")) {
"ongoing" -> MangaState.ONGOING
"finished" -> MangaState.FINISHED
else -> null
},
chapters = chapters.map { jo ->
val number = jo.getInt("chapter")
MangaChapter(
id = generateUid(href),
name = "Глава " + a.selectFirst("a")?.text().orEmpty(),
number = i + 1,
url = href,
id = generateUid(jo.getString("id")),
name = "Глава $number",
number = number,
url = "${manga.url}/read/$number",
scanlator = null,
uploadDate = jo.getLong("released"),
branch = null,
uploadDate = 0L,
source = source,
)
}
@@ -97,86 +145,115 @@ class AnibelRepository(loaderContext: MangaLoaderContext) : RemoteMangaRepositor
}
override suspend fun getPages(chapter: MangaChapter): List<MangaPage> {
val fullUrl = chapter.url.withDomain()
val doc = loaderContext.httpGet(fullUrl).parseHtml()
val scripts = doc.select("script")
for (script in scripts) {
val data = script.html()
val pos = data.indexOf("dataSource")
if (pos == -1) {
continue
}
val json = data.substring(pos).substringAfter('[').substringBefore(']')
val domain = getDomain()
return json.split(",").mapNotNull {
it.trim()
.removeSurrounding('"', '\'')
.toRelativeUrl(domain)
.takeUnless(String::isBlank)
}.map { url ->
MangaPage(
id = generateUid(url),
url = url,
preview = null,
referer = fullUrl,
source = source,
)
val (_, slug, _, number) = chapter.url.split('/')
val chapterJson = apiCall(
"""
chapter(slug: "$slug", chapter: $number) {
id
images {
large
thumbnail
}
}
""".trimIndent()
).getJSONObject("chapter")
val pages = chapterJson.getJSONArray("images")
val chapterUrl = "https://${getDomain()}/${chapter.url}"
return pages.mapIndexed { i, jo ->
MangaPage(
id = generateUid("${chapter.url}/$i"),
url = jo.getString("large"),
referer = chapterUrl,
preview = jo.getString("thumbnail"),
source = source,
)
}
parseFailed("Pages list not found at ${chapter.url.withDomain()}")
}
override suspend fun getTags(): Set<MangaTag> {
val doc = loaderContext.httpGet("https://${getDomain()}/manga").parseHtml()
val root = doc.body().select("div#tabs-genres").select("ul#list.ul-three-colums")
return root.select("p.menu-tags.tupe").mapToSet { p ->
val a = p.selectFirst("a") ?: parseFailed("a is null")
MangaTag(
title = a.text().toCamelCase(),
key = a.attr("data-name"),
source = source
)
}
val json = apiCall(
"""
getFilters(mediaType: manga) {
genres
}
""".trimIndent()
)
val array = json.getJSONObject("getFilters").getJSONArray("genres")
return array.mapToTags()
}
private suspend fun search(query: String): List<Manga> {
val domain = getDomain()
val doc = loaderContext.httpGet("https://$domain/search?q=$query").parseHtml()
val root = doc.body().select("div.manga-block").select("article.tab-2") ?: parseFailed("Cannot find root")
val items = root.select("div.anime-card")
return items.mapNotNull { card ->
val href = card.select("a").attr("href")
val status = card.select("tr")[2].text()
val fullTitle = card.selectFirst("h1.anime-card-title")?.text()
?.substringBeforeLast('[') ?: return@mapNotNull null
val titleParts = fullTitle.splitTwoParts('/')
val json = apiCall(
"""
search(query: "$query", limit: 40) {
id
title {
be
en
}
poster
url
type
}
""".trimIndent()
)
val array = json.getJSONArray("search")
return array.map { jo ->
val mediaId = jo.getString("id")
val title = jo.getJSONObject("title")
val href = "${jo.getString("type").lowercase()}/${jo.getString("url")}"
Manga(
id = generateUid(href),
title = titleParts?.first?.trim() ?: fullTitle,
coverUrl = card.selectFirst("img")?.attr("src")
?.withDomain().orEmpty(),
altTitle = titleParts?.second?.trim(),
id = generateUid(mediaId),
title = title.getString("be"),
coverUrl = jo.getString("poster").removePrefix("/cdn")
.withDomain("cdn") + "?width=200&height=280",
altTitle = title.getString("en").takeUnless(String::isEmpty),
author = null,
rating = Manga.NO_RATING,
url = href,
publicUrl = href.withDomain(),
tags = card.select("p.tupe.tag").select("a").mapNotNullToSet tags@{ x ->
MangaTag(
title = x.text(),
key = x.attr("href").ifEmpty {
return@mapNotNull null
}.substringAfterLast("="),
source = source
)
},
state = when (status) {
"выпускаецца" -> MangaState.ONGOING
"завершанае" -> MangaState.FINISHED
else -> null
},
source = source
publicUrl = "https://${getDomain()}/${href}",
tags = emptySet(),
state = null,
source = source,
)
}
}
private suspend fun apiCall(request: String): JSONObject {
return loaderContext.graphQLQuery("https://api.${getDomain()}/", request)
.getJSONObject("data")
}
private fun JSONArray.mapToTags(): Set<MangaTag> {
fun toTitle(slug: String): String {
val builder = StringBuilder(slug)
var capitalize = true
for ((i, c) in builder.withIndex()) {
when {
c == '-' -> {
builder.setCharAt(i, ' ')
capitalize = true
}
capitalize -> {
builder.setCharAt(i, c.uppercaseChar())
capitalize = false
}
}
}
return builder.toString()
}
val result = ArraySet<MangaTag>(length())
stringIterator().forEach {
result.add(
MangaTag(
title = toTitle(it),
key = it,
source = source,
)
)
}
return result
}
}

View File

@@ -44,6 +44,8 @@ fun JSONObject.getLongOrDefault(name: String, defaultValue: Long): Long = opt(na
operator fun JSONArray.iterator(): Iterator<JSONObject> = JSONIterator(this)
fun JSONArray.stringIterator(): Iterator<String> = JSONStringIterator(this)
private class JSONIterator(private val array: JSONArray) : Iterator<JSONObject> {
private val total = array.length()
@@ -52,7 +54,16 @@ private class JSONIterator(private val array: JSONArray) : Iterator<JSONObject>
override fun hasNext() = index < total - 1
override fun next(): JSONObject = array.getJSONObject(index++)
}
private class JSONStringIterator(private val array: JSONArray) : Iterator<String> {
private val total = array.length()
private var index = 0
override fun hasNext() = index < total - 1
override fun next(): String = array.getString(index++)
}
fun <T> JSONArray.mapToSet(block: (JSONObject) -> T): Set<T> {