From 4977464e690750fba607a7f2b365ac959e260460 Mon Sep 17 00:00:00 2001 From: Koitharu Date: Wed, 8 Sep 2021 07:27:44 +0300 Subject: [PATCH] ExHentai manga source --- .../kotatsu/base/domain/MangaLoaderContext.kt | 29 +- .../kotatsu/core/model/MangaSource.kt | 1 + .../kotatsu/core/parser/ParserModule.kt | 1 + .../core/parser/site/ExHentaiRepository.kt | 265 ++++++++++++++++++ .../core/parser/site/NineMangaRepository.kt | 2 +- .../kotatsu/utils/ext/CookieJarExt.kt | 37 +++ .../koitharu/kotatsu/utils/ext/ParseExt.kt | 8 +- .../koitharu/kotatsu/utils/ext/StringExt.kt | 8 +- 8 files changed, 320 insertions(+), 31 deletions(-) create mode 100644 app/src/main/java/org/koitharu/kotatsu/core/parser/site/ExHentaiRepository.kt create mode 100644 app/src/main/java/org/koitharu/kotatsu/utils/ext/CookieJarExt.kt diff --git a/app/src/main/java/org/koitharu/kotatsu/base/domain/MangaLoaderContext.kt b/app/src/main/java/org/koitharu/kotatsu/base/domain/MangaLoaderContext.kt index 821d89e7b..2c8ec865d 100644 --- a/app/src/main/java/org/koitharu/kotatsu/base/domain/MangaLoaderContext.kt +++ b/app/src/main/java/org/koitharu/kotatsu/base/domain/MangaLoaderContext.kt @@ -9,7 +9,7 @@ import org.koitharu.kotatsu.utils.ext.await open class MangaLoaderContext( private val okHttp: OkHttpClient, - private val cookieJar: CookieJar + val cookieJar: CookieJar ) : KoinComponent { suspend fun httpGet(url: String, headers: Headers? = null): Response { @@ -57,33 +57,6 @@ open class MangaLoaderContext( open fun getSettings(source: MangaSource) = SourceSettings(get(), source) - fun insertCookies(domain: String, vararg cookies: String) { - val url = HttpUrl.Builder() - .scheme(SCHEME_HTTP) - .host(domain) - .build() - cookieJar.saveFromResponse(url, cookies.mapNotNull { - Cookie.parse(url, it) - }) - } - - fun getCookies(domain: String): List { - val url = HttpUrl.Builder() - .scheme(SCHEME_HTTP) - .host(domain) - .build() - return cookieJar.loadForRequest(url) - } - - fun copyCookies(oldDomain: String, newDomain: String) { - val url = HttpUrl.Builder() - .scheme(SCHEME_HTTP) - .host(oldDomain) - val cookies = cookieJar.loadForRequest(url.build()) - url.host(newDomain) - cookieJar.saveFromResponse(url.build(), cookies) - } - private companion object { private const val SCHEME_HTTP = "http" diff --git a/app/src/main/java/org/koitharu/kotatsu/core/model/MangaSource.kt b/app/src/main/java/org/koitharu/kotatsu/core/model/MangaSource.kt index 3f8f07973..f881a59f0 100644 --- a/app/src/main/java/org/koitharu/kotatsu/core/model/MangaSource.kt +++ b/app/src/main/java/org/koitharu/kotatsu/core/model/MangaSource.kt @@ -39,6 +39,7 @@ enum class MangaSource( NINEMANGA_IT("NineManga Italiano", "it", NineMangaRepository.Italiano::class.java), NINEMANGA_BR("NineManga Brasil", "pt", NineMangaRepository.Brazil::class.java), NINEMANGA_FR("NineManga Français", "fr", NineMangaRepository.Francais::class.java), + EXHENTAI("ExHentai", null, ExHentaiRepository::class.java) ; @get:Throws(NoBeanDefFoundException::class) diff --git a/app/src/main/java/org/koitharu/kotatsu/core/parser/ParserModule.kt b/app/src/main/java/org/koitharu/kotatsu/core/parser/ParserModule.kt index baf3156e3..361bdfa61 100644 --- a/app/src/main/java/org/koitharu/kotatsu/core/parser/ParserModule.kt +++ b/app/src/main/java/org/koitharu/kotatsu/core/parser/ParserModule.kt @@ -32,4 +32,5 @@ val parserModule factory(named(MangaSource.NINEMANGA_RU)) { NineMangaRepository.Russian(get()) } factory(named(MangaSource.NINEMANGA_IT)) { NineMangaRepository.Italiano(get()) } factory(named(MangaSource.NINEMANGA_FR)) { NineMangaRepository.Francais(get()) } + factory(named(MangaSource.EXHENTAI)) { ExHentaiRepository(get()) } } \ No newline at end of file diff --git a/app/src/main/java/org/koitharu/kotatsu/core/parser/site/ExHentaiRepository.kt b/app/src/main/java/org/koitharu/kotatsu/core/parser/site/ExHentaiRepository.kt new file mode 100644 index 000000000..a699357e8 --- /dev/null +++ b/app/src/main/java/org/koitharu/kotatsu/core/parser/site/ExHentaiRepository.kt @@ -0,0 +1,265 @@ +package org.koitharu.kotatsu.core.parser.site + +import org.jsoup.nodes.Element +import org.koitharu.kotatsu.base.domain.MangaLoaderContext +import org.koitharu.kotatsu.core.model.* +import org.koitharu.kotatsu.core.parser.MangaRepositoryAuthProvider +import org.koitharu.kotatsu.core.parser.RemoteMangaRepository +import org.koitharu.kotatsu.utils.ext.* +import kotlin.math.pow + +private const val DOMAIN_UNAUTHORIZED = "e-hentai.org" +private const val DOMAIN_AUTHORIZED = "exhentai.org" + +class ExHentaiRepository( + loaderContext: MangaLoaderContext, +) : RemoteMangaRepository(loaderContext), MangaRepositoryAuthProvider { + + override val source = MangaSource.EXHENTAI + + override val defaultDomain: String + get() = if (isAuthorized()) DOMAIN_AUTHORIZED else DOMAIN_UNAUTHORIZED + + override val authUrl: String + get() = "https://${getDomain()}/bounce_login.php" + + private val ratingPattern = Regex("-?[0-9]+px") + private val authCookies = arrayOf("ipb_member_id", "ipb_pass_hash") + private var updateDm = false + + init { + loaderContext.cookieJar.insertCookies(DOMAIN_AUTHORIZED, "nw=1", "sl=dm_2") + loaderContext.cookieJar.insertCookies(DOMAIN_UNAUTHORIZED, "nw=1", "sl=dm_2") + } + + override suspend fun getList( + offset: Int, + query: String?, + sortOrder: SortOrder?, + tag: MangaTag?, + ): List = getList(offset, query, setOfNotNull(tag), sortOrder) + + override suspend fun getList( + offset: Int, + query: String?, + tags: Set?, + sortOrder: SortOrder?, + ): List { + val page = (offset / 25f).toIntUp() + var search = query?.urlEncoded().orEmpty() + val url = buildString { + append("https://") + append(getDomain()) + append("/?page=") + append(page) + if (!tags.isNullOrEmpty()) { + var fCats = 0 + for (tag in tags) { + tag.key.toIntOrNull()?.let { fCats = fCats or it } ?: run { + search += tag.key + " " + } + } + if (fCats != 0) { + append("&f_cats=") + append(1023 - fCats) + } + } + if (search.isNotEmpty()) { + append("&f_search=") + append(search.trim().replace(' ', '+')) + } + // by unknown reason cookie "sl=dm_2" is ignored, so, we should request it again + if (updateDm) { + append("&inline_set=dm_e") + } + } + val body = loaderContext.httpGet(url).parseHtml().body() + val root = body.selectFirst("table.itg") + ?.selectFirst("tbody") + ?: if (updateDm) { + parseFailed("Cannot find root") + } else { + updateDm = true + return getList(offset, query, tags, sortOrder) + } + updateDm = false + return root.children().mapNotNull { tr -> + if (tr.childrenSize() != 2) return@mapNotNull null + val (td1, td2) = tr.children() + val glink = td2.selectFirst("div.glink") ?: parseFailed("glink not found") + val a = glink.parents().select("a").first() ?: parseFailed("link not found") + val href = a.relUrl("href") + val tagsDiv = glink.nextElementSibling() ?: parseFailed("tags div not found") + val mainTag = td2.selectFirst("div.cn")?.let { div -> + MangaTag( + title = div.text(), + key = tagIdByClass(div.classNames()) ?: return@let null, + source = source, + ) + } + Manga( + id = generateUid(href), + title = glink.text().cleanupTitle(), + altTitle = null, + url = href, + publicUrl = a.absUrl("href"), + rating = td2.selectFirst("div.ir")?.parseRating() ?: Manga.NO_RATING, + isNsfw = true, + coverUrl = td1.selectFirst("img")?.absUrl("src").orEmpty(), + tags = setOfNotNull(mainTag), + state = null, + author = tagsDiv.getElementsContainingOwnText("artist:").first() + ?.nextElementSibling()?.text(), + source = source, + ) + } + } + + override suspend fun getDetails(manga: Manga): Manga { + val doc = loaderContext.httpGet(manga.url.withDomain()).parseHtml() + val root = doc.body().selectFirst("div.gm") ?: parseFailed("Cannot find root") + val cover = root.getElementById("gd1")?.children()?.first() + val title = root.getElementById("gd2") + val taglist = root.getElementById("taglist") + val tabs = doc.body().selectFirst("table.ptt")?.selectFirst("tr") + return manga.copy( + title = title?.getElementById("gn")?.text()?.cleanupTitle() ?: manga.title, + altTitle = title?.getElementById("gj")?.text()?.cleanupTitle() ?: manga.altTitle, + publicUrl = doc.baseUri().ifEmpty { manga.publicUrl }, + rating = root.getElementById("rating_label")?.text() + ?.substringAfterLast(' ') + ?.toFloatOrNull() + ?.div(5f) ?: manga.rating, + largeCoverUrl = cover?.css("background")?.cssUrl(), + description = taglist?.select("tr")?.joinToString("
") { tr -> + val (tc, td) = tr.children() + val subtags = td.select("a").joinToString { it.html() } + "${tc.html()} $subtags" + }, + chapters = tabs?.select("a")?.findLast { a -> + a.text().toIntOrNull() != null + }?.let { a -> + val count = a.text().toInt() + val chapters = ArrayList(count) + for (i in 1..count) { + val url = "${manga.url}?p=$i" + chapters += MangaChapter( + id = generateUid(url), + name = "${manga.title} #$i", + number = i, + url = url, + branch = null, + source = source, + ) + } + chapters + }, + ) + } + + override suspend fun getPages(chapter: MangaChapter): List { + val doc = loaderContext.httpGet(chapter.url.withDomain()).parseHtml() + val root = doc.body().getElementById("gdt") ?: parseFailed("Root not found") + return root.select("a").mapNotNull { a -> + val url = a.relUrl("href") + MangaPage( + id = generateUid(url), + url = url, + referer = a.absUrl("href"), + preview = null, + source = source, + ) + } + } + + override suspend fun getPageUrl(page: MangaPage): String { + val doc = loaderContext.httpGet(page.url.withDomain()).parseHtml() + return doc.body().getElementById("img")?.absUrl("src") + ?: parseFailed("Image not found") + } + + override suspend fun getTags(): Set { + val doc = loaderContext.httpGet("https://${getDomain()}").parseHtml() + val root = doc.body().getElementById("searchbox")?.selectFirst("table") + ?: parseFailed("Root not found") + return root.select("div.cs").mapNotNullToSet { div -> + val id = div.id().substringAfterLast('_').toIntOrNull() + ?: return@mapNotNullToSet null + MangaTag( + title = div.text(), + key = id.toString(), + source = source + ) + } + } + + override fun isAuthorized(): Boolean { + val authorized = isAuthorized(DOMAIN_UNAUTHORIZED) + if (authorized) { + if (!isAuthorized(DOMAIN_AUTHORIZED)) { + loaderContext.cookieJar.copyCookies( + DOMAIN_UNAUTHORIZED, + DOMAIN_AUTHORIZED, + authCookies, + ) + loaderContext.cookieJar.insertCookies(DOMAIN_AUTHORIZED, "yay=louder") + } + return true + } + return false + } + + private fun isAuthorized(domain: String): Boolean { + val cookies = loaderContext.cookieJar.getCookies(domain).mapToSet { x -> x.name } + return authCookies.all { it in cookies } + } + + private fun Element.parseRating(): Float { + return runCatching { + val style = requireNotNull(attr("style")) + val (v1, v2) = ratingPattern.find(style)!!.destructured + var p1 = v1.dropLast(2).toInt() + val p2 = v2.dropLast(2).toInt() + if (p2 != -1) { + p1 += 8 + } + (80 - p1) / 80f + }.getOrDefault(Manga.NO_RATING) + } + + private fun String.cleanupTitle(): String { + val result = StringBuilder(length) + var skip = false + for (c in this) { + when { + c == '[' -> skip = true + c == ']' -> skip = false + c.isWhitespace() && result.isEmpty() -> continue + !skip -> result.append(c) + } + } + while (result.lastOrNull()?.isWhitespace() == true) { + result.deleteCharAt(result.lastIndex) + } + return result.toString() + } + + private fun String.cssUrl(): String? { + val fromIndex = indexOf("url(") + if (fromIndex == -1) { + return null + } + val toIndex = indexOf(')', startIndex = fromIndex) + return if (toIndex == -1) { + null + } else { + substring(fromIndex + 4, toIndex).trim() + } + } + + private fun tagIdByClass(classNames: Collection): String? { + val className = classNames.find { x -> x.startsWith("ct") } ?: return null + val num = className.drop(2).toIntOrNull(16) ?: return null + return 2.0.pow(num).toInt().toString() + } +} \ No newline at end of file diff --git a/app/src/main/java/org/koitharu/kotatsu/core/parser/site/NineMangaRepository.kt b/app/src/main/java/org/koitharu/kotatsu/core/parser/site/NineMangaRepository.kt index 7fea87b3c..bbb236521 100644 --- a/app/src/main/java/org/koitharu/kotatsu/core/parser/site/NineMangaRepository.kt +++ b/app/src/main/java/org/koitharu/kotatsu/core/parser/site/NineMangaRepository.kt @@ -16,7 +16,7 @@ abstract class NineMangaRepository( ) : RemoteMangaRepository(loaderContext) { init { - loaderContext.insertCookies(getDomain(), "ninemanga_template_desk=yes") + loaderContext.cookieJar.insertCookies(getDomain(), "ninemanga_template_desk=yes") } override val sortOrders: Set = EnumSet.of( diff --git a/app/src/main/java/org/koitharu/kotatsu/utils/ext/CookieJarExt.kt b/app/src/main/java/org/koitharu/kotatsu/utils/ext/CookieJarExt.kt new file mode 100644 index 000000000..d188cadbe --- /dev/null +++ b/app/src/main/java/org/koitharu/kotatsu/utils/ext/CookieJarExt.kt @@ -0,0 +1,37 @@ +package org.koitharu.kotatsu.utils.ext + +import okhttp3.Cookie +import okhttp3.CookieJar +import okhttp3.HttpUrl + +private const val SCHEME_HTTPS = "https" + +fun CookieJar.insertCookies(domain: String, vararg cookies: String) { + val url = HttpUrl.Builder() + .scheme(SCHEME_HTTPS) + .host(domain) + .build() + saveFromResponse(url, cookies.mapNotNull { + Cookie.parse(url, it) + }) +} + +fun CookieJar.getCookies(domain: String): List { + val url = HttpUrl.Builder() + .scheme(SCHEME_HTTPS) + .host(domain) + .build() + return loadForRequest(url) +} + +fun CookieJar.copyCookies(oldDomain: String, newDomain: String, names: Array? = null) { + val url = HttpUrl.Builder() + .scheme(SCHEME_HTTPS) + .host(oldDomain) + var cookies = loadForRequest(url.build()) + if (names != null) { + cookies = cookies.filter { c -> c.name in names } + } + url.host(newDomain) + saveFromResponse(url.build(), cookies) +} \ No newline at end of file diff --git a/app/src/main/java/org/koitharu/kotatsu/utils/ext/ParseExt.kt b/app/src/main/java/org/koitharu/kotatsu/utils/ext/ParseExt.kt index c6e46b05e..7968b5ba2 100644 --- a/app/src/main/java/org/koitharu/kotatsu/utils/ext/ParseExt.kt +++ b/app/src/main/java/org/koitharu/kotatsu/utils/ext/ParseExt.kt @@ -91,4 +91,10 @@ fun Element.relUrl(attributeKey: String): String { return attr.removePrefix(baseUrl.dropLast(1)) } -private val REGEX_URL_BASE = Regex("^[^/]{2,6}://[^/]+/", RegexOption.IGNORE_CASE) \ No newline at end of file +private val REGEX_URL_BASE = Regex("^[^/]{2,6}://[^/]+/", RegexOption.IGNORE_CASE) + +fun Element.css(property: String): String? { + val regex = Regex("${Regex.escape(property)}\\s*:\\s*[^;]+") + val css = attr("style").find(regex) ?: return null + return css.substringAfter(':').removeSuffix(';').trim() +} \ No newline at end of file diff --git a/app/src/main/java/org/koitharu/kotatsu/utils/ext/StringExt.kt b/app/src/main/java/org/koitharu/kotatsu/utils/ext/StringExt.kt index fbf8326ae..52f774e6f 100644 --- a/app/src/main/java/org/koitharu/kotatsu/utils/ext/StringExt.kt +++ b/app/src/main/java/org/koitharu/kotatsu/utils/ext/StringExt.kt @@ -6,7 +6,6 @@ import java.math.BigInteger import java.net.URLEncoder import java.security.MessageDigest import java.util.* -import kotlin.contracts.contract import kotlin.math.min fun String.longHashCode(): Long { @@ -158,6 +157,13 @@ fun String.substringBetweenLast(from: String, to: String, fallbackValue: String fun String.find(regex: Regex) = regex.find(this)?.value +fun String.removeSuffix(suffix: Char): String { + if (lastOrNull() == suffix) { + return substring(0, length - 1) + } + return this +} + fun String.levenshteinDistance(other: String): Int { if (this == other) { return 0