mirror of
https://github.com/gotson/komga.git
synced 2026-05-05 23:37:43 +00:00
feat(api): make pre-paginated epub containing only images compatible with divina profile
This commit is contained in:
parent
5b75345be7
commit
c2a4d1713d
10 changed files with 96 additions and 14 deletions
|
|
@ -0,0 +1,2 @@
|
|||
alter table MEDIA
|
||||
add column EPUB_DIVINA_COMPATIBLE boolean NOT NULL DEFAULT 0;
|
||||
|
|
@ -11,6 +11,7 @@ data class Media(
|
|||
val comment: String? = null,
|
||||
val extension: MediaExtension? = null,
|
||||
val bookId: String = "",
|
||||
val epubDivinaCompatible: Boolean = false,
|
||||
override val createdDate: LocalDateTime = LocalDateTime.now(),
|
||||
override val lastModifiedDate: LocalDateTime = createdDate,
|
||||
) : Auditable {
|
||||
|
|
|
|||
|
|
@ -72,7 +72,7 @@ class BookAnalyzer(
|
|||
when (mediaType.profile) {
|
||||
MediaProfile.DIVINA -> analyzeDivina(book, mediaType, analyzeDimensions)
|
||||
MediaProfile.PDF -> analyzePdf(book, analyzeDimensions)
|
||||
MediaProfile.EPUB -> analyzeEpub(book)
|
||||
MediaProfile.EPUB -> analyzeEpub(book, analyzeDimensions)
|
||||
}.copy(mediaType = mediaType.type)
|
||||
} catch (ade: AccessDeniedException) {
|
||||
logger.error(ade) { "Error while analyzing book: $book" }
|
||||
|
|
@ -123,12 +123,14 @@ class BookAnalyzer(
|
|||
return Media(status = Media.Status.READY, pages = pages, pageCount = pages.size, files = files, comment = entriesErrorSummary)
|
||||
}
|
||||
|
||||
private fun analyzeEpub(book: Book): Media {
|
||||
val manifest = epubExtractor.getManifest(book.path)
|
||||
private fun analyzeEpub(book: Book, analyzeDimensions: Boolean): Media {
|
||||
val manifest = epubExtractor.getManifest(book.path, analyzeDimensions)
|
||||
return Media(
|
||||
status = Media.Status.READY,
|
||||
pages = manifest.divinaPages,
|
||||
files = manifest.resources,
|
||||
pageCount = manifest.pageCount,
|
||||
epubDivinaCompatible = manifest.divinaPages.isNotEmpty(),
|
||||
extension = MediaExtensionEpub(
|
||||
toc = manifest.toc,
|
||||
landmarks = manifest.landmarks,
|
||||
|
|
@ -203,7 +205,9 @@ class BookAnalyzer(
|
|||
return when (book.media.profile) {
|
||||
MediaProfile.DIVINA -> divinaExtractors.getValue(book.media.mediaType!!).getEntryStream(book.book.path, book.media.pages[number - 1].fileName)
|
||||
MediaProfile.PDF -> pdfExtractor.getPageContentAsImage(book.book.path, number).bytes
|
||||
MediaProfile.EPUB -> throw MediaUnsupportedException("Epub profile does not support getting page content")
|
||||
MediaProfile.EPUB ->
|
||||
if (book.media.epubDivinaCompatible) epubExtractor.getEntryStream(book.book.path, book.media.pages[number - 1].fileName)
|
||||
else throw MediaUnsupportedException("Epub profile does not support getting page content")
|
||||
null -> throw MediaNotReadyException()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,6 +43,9 @@ class KomgaProperties {
|
|||
@Positive
|
||||
var pageHashing: Int = 3
|
||||
|
||||
@Positive
|
||||
var epubDivinaLetterCountThreshold: Int = 15
|
||||
|
||||
@Deprecated("Moved to server settings since 1.5.0")
|
||||
var rememberMe = RememberMe()
|
||||
|
||||
|
|
|
|||
|
|
@ -447,6 +447,7 @@ class BookDtoDao(
|
|||
mediaType = mediaType ?: "",
|
||||
pagesCount = pageCount.toInt(),
|
||||
comment = comment ?: "",
|
||||
epubDivinaCompatible = epubDivinaCompatible,
|
||||
)
|
||||
|
||||
private fun BookMetadataRecord.toDto(authors: List<AuthorDto>, tags: Set<String>, links: List<WebLinkDto>) =
|
||||
|
|
|
|||
|
|
@ -49,6 +49,7 @@ class MediaDao(
|
|||
m.COMMENT,
|
||||
m.PAGE_COUNT,
|
||||
m.EXTENSION_CLASS,
|
||||
m.EPUB_DIVINA_COMPATIBLE,
|
||||
*p.fields(),
|
||||
)
|
||||
|
||||
|
|
@ -133,9 +134,10 @@ class MediaDao(
|
|||
m.MEDIA_TYPE,
|
||||
m.COMMENT,
|
||||
m.PAGE_COUNT,
|
||||
m.EPUB_DIVINA_COMPATIBLE,
|
||||
m.EXTENSION_CLASS,
|
||||
m.EXTENSION_VALUE_BLOB,
|
||||
).values(null as String?, null, null, null, null, null, null),
|
||||
).values(null as String?, null, null, null, null, null, null, null),
|
||||
).also { step ->
|
||||
chunk.forEach { media ->
|
||||
step.bind(
|
||||
|
|
@ -144,6 +146,7 @@ class MediaDao(
|
|||
media.mediaType,
|
||||
media.comment,
|
||||
media.pageCount,
|
||||
media.epubDivinaCompatible,
|
||||
media.extension?.let { if (it is ProxyExtension) null else it::class.qualifiedName },
|
||||
media.extension?.let { if (it is ProxyExtension) null else mapper.serializeJsonGz(it) },
|
||||
)
|
||||
|
|
@ -227,6 +230,7 @@ class MediaDao(
|
|||
.set(m.MEDIA_TYPE, media.mediaType)
|
||||
.set(m.COMMENT, media.comment)
|
||||
.set(m.PAGE_COUNT, media.pageCount)
|
||||
.set(m.EPUB_DIVINA_COMPATIBLE, media.epubDivinaCompatible)
|
||||
.apply {
|
||||
if (media.extension != null && media.extension !is ProxyExtension) {
|
||||
set(m.EXTENSION_CLASS, media.extension::class.qualifiedName)
|
||||
|
|
@ -277,6 +281,7 @@ class MediaDao(
|
|||
extension = ProxyExtension.of(extensionClass),
|
||||
comment = comment,
|
||||
bookId = bookId,
|
||||
epubDivinaCompatible = epubDivinaCompatible,
|
||||
createdDate = createdDate.toCurrentTimeZone(),
|
||||
lastModifiedDate = lastModifiedDate.toCurrentTimeZone(),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -1,18 +1,32 @@
|
|||
package org.gotson.komga.infrastructure.mediacontainer.epub
|
||||
|
||||
import mu.KotlinLogging
|
||||
import org.apache.commons.compress.archivers.ArchiveEntry
|
||||
import org.apache.commons.compress.archivers.zip.ZipFile
|
||||
import org.gotson.komga.domain.model.BookPage
|
||||
import org.gotson.komga.domain.model.EpubTocEntry
|
||||
import org.gotson.komga.domain.model.MediaFile
|
||||
import org.gotson.komga.domain.model.R2Locator
|
||||
import org.gotson.komga.domain.model.TypedBytes
|
||||
import org.gotson.komga.infrastructure.image.ImageAnalyzer
|
||||
import org.gotson.komga.infrastructure.mediacontainer.ContentDetector
|
||||
import org.jsoup.Jsoup
|
||||
import org.springframework.beans.factory.annotation.Value
|
||||
import org.springframework.stereotype.Service
|
||||
import java.nio.file.Path
|
||||
import kotlin.io.path.Path
|
||||
import kotlin.io.path.invariantSeparatorsPathString
|
||||
import kotlin.math.ceil
|
||||
import kotlin.math.roundToInt
|
||||
|
||||
private val logger = KotlinLogging.logger {}
|
||||
|
||||
@Service
|
||||
class EpubExtractor {
|
||||
class EpubExtractor(
|
||||
private val contentDetector: ContentDetector,
|
||||
private val imageAnalyzer: ImageAnalyzer,
|
||||
@Value("#{@komgaProperties.epubDivinaLetterCountThreshold}") private val letterCountThreshold: Int,
|
||||
) {
|
||||
|
||||
/**
|
||||
* Retrieves a specific entry by name from the zip archive
|
||||
|
|
@ -44,18 +58,20 @@ class EpubExtractor {
|
|||
} else null
|
||||
}
|
||||
|
||||
fun getManifest(path: Path): EpubManifest =
|
||||
fun getManifest(path: Path, analyzeDimensions: Boolean): EpubManifest =
|
||||
path.epub { epub ->
|
||||
val resources = getResources(epub)
|
||||
val isFixedLayout = isFixedLayout(epub)
|
||||
val pageCount = computePageCount(epub)
|
||||
EpubManifest(
|
||||
resources = resources,
|
||||
toc = getToc(epub),
|
||||
landmarks = getLandmarks(epub),
|
||||
pageList = getPageList(epub),
|
||||
pageCount = computePageCount(epub),
|
||||
pageCount = pageCount,
|
||||
isFixedLayout = isFixedLayout,
|
||||
positions = computePositions(resources, isFixedLayout),
|
||||
divinaPages = getDivinaPages(epub, isFixedLayout, pageCount, analyzeDimensions),
|
||||
)
|
||||
}
|
||||
|
||||
|
|
@ -84,6 +100,52 @@ class EpubExtractor {
|
|||
}
|
||||
}
|
||||
|
||||
private fun getDivinaPages(epub: EpubPackage, isFixedLayout: Boolean, pageCount: Int, analyzeDimensions: Boolean): List<BookPage> {
|
||||
if (!isFixedLayout) return emptyList()
|
||||
|
||||
try {
|
||||
val pagesWithImages = epub.opfDoc.select("spine > itemref")
|
||||
.map { it.attr("idref") }
|
||||
.mapNotNull { idref -> epub.manifest[idref]?.href?.let { normalizeHref(epub.opfDir, it) } }
|
||||
.map { pagePath ->
|
||||
val doc = epub.zip.getInputStream(epub.zip.getEntry(pagePath)).use { Jsoup.parse(it, null, "") }
|
||||
|
||||
// if a page has text over the threshold then the book is not divina compatible
|
||||
if (doc.body().text().length > letterCountThreshold) return emptyList()
|
||||
|
||||
val img = doc.getElementsByTag("img")
|
||||
.map { it.attr("src") } // get the src, which can be a relative path
|
||||
|
||||
val svg = doc.select("svg > image[xlink:href]")
|
||||
.map { it.attr("xlink:href") } // get the source, which can be a relative path
|
||||
|
||||
(img + svg).map { (Path(pagePath).parent ?: Path("")).resolve(it).normalize().invariantSeparatorsPathString } // resolve it against the page folder
|
||||
}
|
||||
|
||||
if (pagesWithImages.size != pageCount) return emptyList()
|
||||
val imagesPath = pagesWithImages.flatten()
|
||||
if (imagesPath.size != pageCount) return emptyList()
|
||||
|
||||
val divinaPages = imagesPath.mapNotNull { imagePath ->
|
||||
val mediaType = epub.manifest.values.firstOrNull { normalizeHref(epub.opfDir, it.href) == imagePath }?.mediaType ?: return@mapNotNull null
|
||||
val zipEntry = epub.zip.getEntry(imagePath)
|
||||
if (!contentDetector.isImage(mediaType)) return@mapNotNull null
|
||||
|
||||
val dimension =
|
||||
if (analyzeDimensions) epub.zip.getInputStream(zipEntry).use { imageAnalyzer.getDimension(it) }
|
||||
else null
|
||||
val fileSize = if (zipEntry.size == ArchiveEntry.SIZE_UNKNOWN) null else zipEntry.size
|
||||
BookPage(fileName = imagePath, mediaType = mediaType, dimension = dimension, fileSize = fileSize)
|
||||
}
|
||||
|
||||
if (divinaPages.size != pageCount) return emptyList()
|
||||
return divinaPages
|
||||
} catch (e: Exception) {
|
||||
logger.warn(e) { "Error while getting divina pages" }
|
||||
return emptyList()
|
||||
}
|
||||
}
|
||||
|
||||
private fun computePageCount(epub: EpubPackage): Int {
|
||||
val spine = epub.opfDoc.select("spine > itemref")
|
||||
.map { it.attr("idref") }
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.gotson.komga.infrastructure.mediacontainer.epub
|
||||
|
||||
import org.gotson.komga.domain.model.BookPage
|
||||
import org.gotson.komga.domain.model.EpubTocEntry
|
||||
import org.gotson.komga.domain.model.MediaFile
|
||||
import org.gotson.komga.domain.model.R2Locator
|
||||
|
|
@ -12,4 +13,5 @@ data class EpubManifest(
|
|||
val pageCount: Int,
|
||||
val isFixedLayout: Boolean,
|
||||
val positions: List<R2Locator>,
|
||||
val divinaPages: List<BookPage>,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -227,14 +227,15 @@ class WebPubGenerator(
|
|||
|
||||
private fun BookDto.toWPLinkDtos(uriBuilder: UriComponentsBuilder): List<WPLinkDto> {
|
||||
val komgaMediaType = KomgaMediaType.fromMediaType(media.mediaType)
|
||||
return listOfNotNull(
|
||||
return buildList {
|
||||
// most appropriate manifest
|
||||
WPLinkDto(rel = OpdsLinkRel.SELF, href = uriBuilder.cloneBuilder().path("books/$id/manifest").toUriString(), type = mediaProfileToWebPub(komgaMediaType?.profile)),
|
||||
// PDF is also available under the Divina profile
|
||||
if (komgaMediaType?.profile == MediaProfile.PDF) WPLinkDto(href = uriBuilder.cloneBuilder().path("books/$id/manifest/divina").toUriString(), type = MEDIATYPE_DIVINA_JSON_VALUE) else null,
|
||||
add(WPLinkDto(rel = OpdsLinkRel.SELF, href = uriBuilder.cloneBuilder().path("books/$id/manifest").toUriString(), type = mediaProfileToWebPub(komgaMediaType?.profile)))
|
||||
// PDF is also available under the Divina profile / EPUB that are Divina compatible
|
||||
if (komgaMediaType?.profile == MediaProfile.PDF || (komgaMediaType?.profile == MediaProfile.EPUB && media.epubDivinaCompatible))
|
||||
add(WPLinkDto(href = uriBuilder.cloneBuilder().path("books/$id/manifest/divina").toUriString(), type = MEDIATYPE_DIVINA_JSON_VALUE))
|
||||
// main acquisition link
|
||||
WPLinkDto(rel = OpdsLinkRel.ACQUISITION, type = komgaMediaType?.exportType ?: media.mediaType, href = uriBuilder.cloneBuilder().path("books/$id/file").toUriString()),
|
||||
)
|
||||
add(WPLinkDto(rel = OpdsLinkRel.ACQUISITION, type = komgaMediaType?.exportType ?: media.mediaType, href = uriBuilder.cloneBuilder().path("books/$id/file").toUriString()))
|
||||
}
|
||||
}
|
||||
|
||||
private fun mediaProfileToWebPub(profile: MediaProfile?): String = when (profile) {
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ data class MediaDto(
|
|||
val mediaType: String,
|
||||
val pagesCount: Int,
|
||||
val comment: String,
|
||||
val epubDivinaCompatible: Boolean,
|
||||
) {
|
||||
val mediaProfile: String by lazy { MediaType.fromMediaType(mediaType)?.profile?.name ?: "" }
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue