From 2947cd3038a51abc95212ab6e2ca23778b919224 Mon Sep 17 00:00:00 2001 From: Koitharu Date: Wed, 9 Mar 2022 18:26:19 +0200 Subject: [PATCH] Add NudeMoon manga parser --- .../kotatsu/core/model/MangaSource.kt | 3 +- .../kotatsu/core/parser/ParserModule.kt | 2 +- .../core/parser/site/NudeMoonRepository.kt | 186 +++++++++++++----- 3 files changed, 134 insertions(+), 57 deletions(-) diff --git a/app/src/main/java/org/koitharu/kotatsu/core/model/MangaSource.kt b/app/src/main/java/org/koitharu/kotatsu/core/model/MangaSource.kt index af4e67704..2733a422a 100644 --- a/app/src/main/java/org/koitharu/kotatsu/core/model/MangaSource.kt +++ b/app/src/main/java/org/koitharu/kotatsu/core/model/MangaSource.kt @@ -19,8 +19,7 @@ enum class MangaSource( YAOICHAN("Яой-тян", "ru"), MANGATOWN("MangaTown", "en"), MANGALIB("MangaLib", "ru"), - - // NUDEMOON("Nude-Moon", "ru", NudeMoonRepository::class.java), + NUDEMOON("Nude-Moon", "ru"), MANGAREAD("MangaRead", "en"), REMANGA("Remanga", "ru"), HENTAILIB("HentaiLib", "ru"), diff --git a/app/src/main/java/org/koitharu/kotatsu/core/parser/ParserModule.kt b/app/src/main/java/org/koitharu/kotatsu/core/parser/ParserModule.kt index 0431c2fdb..ef16f37a9 100644 --- a/app/src/main/java/org/koitharu/kotatsu/core/parser/ParserModule.kt +++ b/app/src/main/java/org/koitharu/kotatsu/core/parser/ParserModule.kt @@ -17,7 +17,7 @@ val parserModule factory(named(MangaSource.YAOICHAN)) { YaoiChanRepository(get()) } factory(named(MangaSource.MANGATOWN)) { MangaTownRepository(get()) } factory(named(MangaSource.MANGALIB)) { MangaLibRepository(get()) } - // factory(named(MangaSource.NUDEMOON)) { NudeMoonRepository(get()) } + factory(named(MangaSource.NUDEMOON)) { NudeMoonRepository(get()) } factory(named(MangaSource.MANGAREAD)) { MangareadRepository(get()) } factory(named(MangaSource.REMANGA)) { RemangaRepository(get()) } factory(named(MangaSource.HENTAILIB)) { HentaiLibRepository(get()) } diff --git a/app/src/main/java/org/koitharu/kotatsu/core/parser/site/NudeMoonRepository.kt b/app/src/main/java/org/koitharu/kotatsu/core/parser/site/NudeMoonRepository.kt index febfabac1..e1da76ba5 100644 --- a/app/src/main/java/org/koitharu/kotatsu/core/parser/site/NudeMoonRepository.kt +++ b/app/src/main/java/org/koitharu/kotatsu/core/parser/site/NudeMoonRepository.kt @@ -1,9 +1,24 @@ package org.koitharu.kotatsu.core.parser.site -/* -class NudeMoonRepository(loaderContext: MangaLoaderContext) : RemoteMangaRepository(loaderContext) { +import android.util.SparseArray +import org.koitharu.kotatsu.base.domain.MangaLoaderContext +import org.koitharu.kotatsu.core.exceptions.AuthRequiredException +import org.koitharu.kotatsu.core.exceptions.ParseException +import org.koitharu.kotatsu.core.model.* +import org.koitharu.kotatsu.core.parser.MangaRepositoryAuthProvider +import org.koitharu.kotatsu.core.parser.RemoteMangaRepository +import org.koitharu.kotatsu.utils.ext.* +import java.text.SimpleDateFormat +import java.util.* +import java.util.regex.Pattern + +class NudeMoonRepository(loaderContext: MangaLoaderContext) : RemoteMangaRepository(loaderContext), + MangaRepositoryAuthProvider { override val source = MangaSource.NUDEMOON + override val defaultDomain = "nude-moon.net" + override val authUrl: String + get() = "https://${getDomain()}/index.php" override val sortOrders: Set = EnumSet.of( SortOrder.NEWEST, @@ -11,125 +26,188 @@ class NudeMoonRepository(loaderContext: MangaLoaderContext) : RemoteMangaReposit SortOrder.RATING ) + private val pageUrlPatter = Pattern.compile(".*\\?page=[0-9]+$") + init { - loaderContext.insertCookies( - conf.getDomain(DEFAULT_DOMAIN), + loaderContext.cookieJar.insertCookies( + getDomain(), "NMfYa=1;", "nm_mobile=0;" ) } - override suspend fun getList( + override suspend fun getList2( offset: Int, query: String?, - sortOrder: SortOrder?, - tag: MangaTag? + tags: Set?, + sortOrder: SortOrder? ): List { - val domain = conf.getDomain(DEFAULT_DOMAIN) + val domain = getDomain() val url = when { - !query.isNullOrEmpty() -> "https://$domain//search?stext=${query.urlEncoded()}&rowstart=$offset" - tag != null -> "https://$domain/tags/${tag.key}&rowstart=$offset" + !query.isNullOrEmpty() -> "https://$domain/search?stext=${query.urlEncoded()}&rowstart=$offset" + !tags.isNullOrEmpty() -> tags.joinToString( + separator = "_", + prefix = "https://$domain/tags/", + postfix = "&rowstart=$offset", + transform = { it.key.urlEncoded() } + ) else -> "https://$domain/all_manga?${getSortKey(sortOrder)}&rowstart=$offset" } val doc = loaderContext.httpGet(url).parseHtml() val root = doc.body().run { - selectFirst("td.shoutbox") ?: selectFirst("td.main-bg") - } ?: throw ParseException("Cannot find root") + selectFirst("td.main-bg") ?: selectFirst("td.main-body") + } ?: parseFailed("Cannot find root") return root.select("table.news_pic2").mapNotNull { row -> val a = row.selectFirst("td.bg_style1")?.selectFirst("a") ?: return@mapNotNull null - val href = a.absUrl("href") + val href = a.relUrl("href") val title = a.selectFirst("h2")?.text().orEmpty() - val info = row.selectFirst("div.tbl2") ?: return@mapNotNull null + val info = row.selectFirst("td[width=100%]") ?: return@mapNotNull null Manga( - id = href.longHashCode(), + id = generateUid(href), url = href, title = title.substringAfter(" / "), altTitle = title.substringBefore(" / ", "") .takeUnless { it.isBlank() }, - author = info.getElementsContainingOwnText("Автор:")?.firstOrNull() + author = info.getElementsContainingOwnText("Автор:").firstOrNull() ?.nextElementSibling()?.ownText(), - coverUrl = row.selectFirst("img.news_pic2")?.absUrl("src") + coverUrl = row.selectFirst("img.news_pic2")?.absUrl("data-src") .orEmpty(), tags = row.selectFirst("span.tag-links")?.select("a") ?.mapToSet { MangaTag( - title = it.text(), - key = it.attr("href").substringAfterLast('/').urlEncoded(), + title = it.text().toTitleCase(), + key = it.attr("href").substringAfterLast('/'), source = source ) }.orEmpty(), - source = source + source = source, + publicUrl = a.absUrl("href"), + rating = Manga.NO_RATING, + isNsfw = true, + description = row.selectFirst("div.description")?.html(), + state = null, ) } } override suspend fun getDetails(manga: Manga): Manga { - val doc = loaderContext.httpGet(manga.url).parseHtml() - val root = doc.body().selectFirst("table.shoutbox") - ?: throw ParseException("Cannot find root") + val body = loaderContext.httpGet(manga.url.withDomain()).parseHtml().body() + val root = body.selectFirst("table.shoutbox") + ?: parseFailed("Cannot find root") val info = root.select("div.tbl2") + val lastInfo = info.last() return manga.copy( - description = info.select("div.blockquote").lastOrNull()?.html(), + largeCoverUrl = body.selectFirst("img.news_pic2")?.absUrl("src"), + description = info.select("div.blockquote").lastOrNull()?.html() ?: manga.description, tags = info.select("span.tag-links").firstOrNull()?.select("a")?.mapToSet { MangaTag( - title = it.text(), - key = it.attr("href").substringAfterLast('/').urlEncoded(), - source = source + title = it.text().toTitleCase(), + key = it.attr("href").substringAfterLast('/'), + source = source, ) - } ?: manga.tags, + }?.plus(manga.tags) ?: manga.tags, + author = lastInfo?.getElementsByAttributeValueContaining("href", "mangaka/")?.text() + ?: manga.author, chapters = listOf( MangaChapter( id = manga.id, url = manga.url, source = source, number = 1, - name = manga.title + name = manga.title, + scanlator = lastInfo?.getElementsByAttributeValueContaining("href", "perevod/")?.text(), + uploadDate = lastInfo?.getElementsContainingOwnText("Дата:") + ?.firstOrNull() + ?.html() + ?.parseDate() ?: 0L, + branch = null, ) ) ) } override suspend fun getPages(chapter: MangaChapter): List { - conf.getDomain(DEFAULT_DOMAIN) - val doc = loaderContext.httpGet(chapter.url).parseHtml() + val fullUrl = chapter.url.withDomain() + val doc = loaderContext.httpGet(fullUrl).parseHtml() val root = doc.body().selectFirst("td.main-body") - ?: throw ParseException("Cannot find root") - return root.getElementsByAttributeValueMatching("href", pageUrlPatter).mapNotNull { a -> - val url = a.absUrl("href") + ?: parseFailed("Cannot find root") + val readlink = root.selectFirst("table.shoutbox")?.selectFirst("a")?.absUrl("href") + ?: parseFailed("Cannot obtain read link") + val fullPages = getFullPages(readlink) + return root.getElementsByAttributeValueMatching("href", pageUrlPatter).mapIndexedNotNull { i, a -> + val url = a.relUrl("href") MangaPage( - id = url.longHashCode(), - url = url, - referer = chapter.url, + id = generateUid(url), + url = fullPages[i] ?: return@mapIndexedNotNull null, + referer = fullUrl, preview = a.selectFirst("img")?.absUrl("src"), - source = source + source = source, ) } } - override suspend fun getPageUrl(page: MangaPage): String { - val doc = loaderContext.httpGet(page.url).parseHtml() - return doc.body().getElementById("gallery").attr("src").inContextOf(doc) - } - override suspend fun getTags(): Set { - val domain = conf.getDomain(DEFAULT_DOMAIN) + val domain = getDomain() val doc = loaderContext.httpGet("https://$domain/all_manga").parseHtml() val root = doc.body().getElementsContainingOwnText("Поиск манги по тегам") .firstOrNull()?.parents()?.find { it.tag().normalName() == "tbody" } ?.selectFirst("td.textbox")?.selectFirst("td.small") - ?: throw ParseException("Tags root not found") + ?: parseFailed("Tags root not found") return root.select("a").mapToSet { MangaTag( - title = it.text(), + title = it.text().toTitleCase(), key = it.attr("href").substringAfterLast('/') - .removeSuffix("+").urlEncoded(), - source = source + .removeSuffix("+"), + source = source, ) } } - override fun onCreatePreferences() = arraySetOf(SourceSettings.KEY_DOMAIN) + override fun isAuthorized(): Boolean { + return loaderContext.cookieJar.getCookies(getDomain()).any { + it.name == "fusion_user" + } + } + + suspend fun getUsername(): String { + val body = loaderContext.httpGet("https://${getDomain()}/").parseHtml() + .body() + return body + .getElementsContainingOwnText("Профиль") + .firstOrNull() + ?.attr("href") + ?.substringAfterLast('/') + ?: run { + throw if (body.selectFirst("form[name=\"loginform\"]") != null) { + AuthRequiredException(authUrl) + } else { + ParseException("Cannot find username") + } + } + } + + private suspend fun getFullPages(url: String): SparseArray { + val scripts = loaderContext.httpGet(url).parseHtml().select("script") + val regex = "images\\[(\\d+)].src = '([^']+)'".toRegex() + for (script in scripts) { + val src = script.html() + if (src.isEmpty()) { + continue + } + val matches = regex.findAll(src).toList() + if (matches.isEmpty()) { + continue + } + val result = SparseArray(matches.size) + matches.forEach { match -> + val (index, link) = match.destructured + result.append(index.toInt(), link) + } + return result + } + parseFailed("Cannot find pages list") + } private fun getSortKey(sortOrder: SortOrder?) = when (sortOrder ?: sortOrders.minByOrNull { it.ordinal }) { @@ -139,9 +217,9 @@ class NudeMoonRepository(loaderContext: MangaLoaderContext) : RemoteMangaReposit else -> "like" } - private companion object { - - private const val DEFAULT_DOMAIN = "nude-moon.me" - private val pageUrlPatter = Pattern.compile(".*\\?page=[0-9]+$") + private fun String.parseDate(): Long { + val dateString = substringBetweenFirst("Дата:", "<")?.trim() ?: return 0 + val dateFormat = SimpleDateFormat("d MMMM yyyy", Locale("ru")) + return dateFormat.tryParse(dateString) } -}*/ +}