Fix all parsers issues

This commit is contained in:
Koitharu
2020-03-18 19:12:27 +02:00
parent 0d041e9a0a
commit cae27dda05
5 changed files with 66 additions and 42 deletions

View File

@@ -93,7 +93,7 @@ abstract class ChanRepository : RemoteMangaRepository() {
val json = data.substring(pos).substringAfter('[').substringBefore(';')
.substringBeforeLast(']')
return json.split(",").mapNotNull {
it.trim().removeSurrounding('"').takeUnless(String::isBlank)
it.trim().removeSurrounding('"','\'').takeUnless(String::isBlank)
}.map { url ->
MangaPage(
id = url.longHashCode(),

View File

@@ -4,6 +4,7 @@ import org.koitharu.kotatsu.core.exceptions.ParseException
import org.koitharu.kotatsu.core.model.Manga
import org.koitharu.kotatsu.core.model.MangaChapter
import org.koitharu.kotatsu.core.model.MangaSource
import org.koitharu.kotatsu.core.model.MangaTag
import org.koitharu.kotatsu.utils.ext.longHashCode
import org.koitharu.kotatsu.utils.ext.parseHtml
import org.koitharu.kotatsu.utils.ext.withDomain
@@ -18,22 +19,27 @@ class HenChanRepository : ChanRepository() {
val doc = loaderContext.httpGet(manga.url).parseHtml()
val root =
doc.body().getElementById("dle-content") ?: throw ParseException("Cannot find root")
val readLink = manga.url.replace("manga", "online")
return manga.copy(
description = root.getElementById("description")?.html()?.substringBeforeLast("<div"),
largeCoverUrl = root.getElementById("cover")?.attr("src")?.withDomain(domain),
chapters = root.getElementById("right").select("table.table_cha").flatMap { table ->
table.select("div.manga2")
}.mapNotNull { it.selectFirst("a") }.reversed().mapIndexedNotNull { i, a ->
val href = a.attr("href")
?.withDomain(domain) ?: return@mapIndexedNotNull null
MangaChapter(
id = href.longHashCode(),
name = a.text().trim(),
number = i + 1,
url = href,
tags = root.selectFirst("div.sidetags")?.select("li.sidetag")?.map {
val a = it.children().last()
MangaTag(
title = a.text(),
key = a.attr("href").substringAfterLast('/'),
source = source
)
}
}?.toSet() ?: manga.tags,
chapters = listOf(
MangaChapter(
id = readLink.longHashCode(),
url = readLink,
source = source,
number = 1,
name = manga.title
)
)
)
}
}

View File

@@ -1,9 +1,39 @@
package org.koitharu.kotatsu.core.parser.site
import org.koitharu.kotatsu.core.exceptions.ParseException
import org.koitharu.kotatsu.core.model.Manga
import org.koitharu.kotatsu.core.model.MangaChapter
import org.koitharu.kotatsu.core.model.MangaSource
import org.koitharu.kotatsu.utils.ext.longHashCode
import org.koitharu.kotatsu.utils.ext.parseHtml
import org.koitharu.kotatsu.utils.ext.withDomain
class YaoiChanRepository : ChanRepository() {
override val source = MangaSource.YAOICHAN
override val defaultDomain = "yaoi-chan.me"
override suspend fun getDetails(manga: Manga): Manga {
val domain = conf.getDomain(defaultDomain)
val doc = loaderContext.httpGet(manga.url).parseHtml()
val root =
doc.body().getElementById("dle-content") ?: throw ParseException("Cannot find root")
return manga.copy(
description = root.getElementById("description")?.html()?.substringBeforeLast("<div"),
largeCoverUrl = root.getElementById("cover")?.attr("src")?.withDomain(domain),
chapters = root.select("table.table_cha").flatMap { table ->
table.select("div.manga")
}.mapNotNull { it.selectFirst("a") }.reversed().mapIndexedNotNull { i, a ->
val href = a.attr("href")
?.withDomain(domain) ?: return@mapIndexedNotNull null
MangaChapter(
id = href.longHashCode(),
name = a.text().trim(),
number = i + 1,
url = href,
source = source
)
}
)
}
}

View File

@@ -1,12 +0,0 @@
package org.koitharu.kotatsu.parsers
interface MangaParserTest {
fun testMangaList()
fun testMangaDetails()
fun testMangaPages()
fun testTags()
}

View File

@@ -6,23 +6,23 @@ import java.net.URL
object AssertX {
fun assertContentType(url: String, type: String, subtype: String? = null) {
Assert.assertFalse("URL is empty", url.isEmpty())
val cn = URL(url).openConnection() as HttpURLConnection
cn.requestMethod = "HEAD"
cn.connect()
when (val code = cn.responseCode) {
HttpURLConnection.HTTP_MOVED_PERM,
HttpURLConnection.HTTP_MOVED_TEMP -> assertContentType(cn.getHeaderField("Location"), type, subtype)
HttpURLConnection.HTTP_OK -> {
val ct = cn.contentType.substringBeforeLast(';').split("/")
Assert.assertEquals(type, ct.first())
if (subtype != null) {
Assert.assertEquals(subtype, ct.last())
}
}
else -> Assert.fail("Invalid response code $code")
}
}
fun assertContentType(url: String, type: String, subtype: String? = null) {
Assert.assertFalse("URL is empty", url.isEmpty())
val cn = URL(url).openConnection() as HttpURLConnection
cn.requestMethod = "HEAD"
cn.connect()
when (val code = cn.responseCode) {
HttpURLConnection.HTTP_MOVED_PERM,
HttpURLConnection.HTTP_MOVED_TEMP -> assertContentType(cn.getHeaderField("Location"), type, subtype)
HttpURLConnection.HTTP_OK -> {
val ct = cn.contentType.substringBeforeLast(';').split("/")
Assert.assertEquals(type, ct.first())
if (subtype != null) {
Assert.assertEquals(subtype, ct.last())
}
}
else -> Assert.fail("Invalid response code $code")
}
}
}