mangadex_at_home/src/main/kotlin/mdnet/base/server/ImageServer.kt

410 lines
16 KiB
Kotlin
Raw Normal View History

2020-06-22 17:02:36 +00:00
/*
Mangadex@Home
Copyright (c) 2020, MangaDex Network
This file is part of MangaDex@Home.
MangaDex@Home is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
MangaDex@Home is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this MangaDex@Home. If not, see <http://www.gnu.org/licenses/>.
*/
2020-06-13 22:36:26 +00:00
/* ktlint-disable no-wildcard-imports */
2020-06-13 23:19:04 +00:00
package mdnet.base.server
2020-06-13 22:36:26 +00:00
2020-07-04 19:00:59 +00:00
import com.fasterxml.jackson.core.JsonProcessingException
2020-07-02 21:24:12 +00:00
import com.fasterxml.jackson.databind.DeserializationFeature
import com.fasterxml.jackson.databind.ObjectMapper
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule
import com.fasterxml.jackson.module.kotlin.jacksonObjectMapper
import com.fasterxml.jackson.module.kotlin.readValue
2020-07-02 16:06:32 +00:00
import java.io.BufferedInputStream
import java.io.BufferedOutputStream
import java.io.File
import java.io.InputStream
import java.time.Clock
2020-07-02 21:24:12 +00:00
import java.time.OffsetDateTime
import java.util.*
2020-07-02 16:06:32 +00:00
import java.util.concurrent.Executors
import java.util.concurrent.atomic.AtomicBoolean
2020-07-02 16:06:32 +00:00
import java.util.concurrent.atomic.AtomicReference
import javax.crypto.Cipher
import javax.crypto.CipherInputStream
import javax.crypto.CipherOutputStream
2020-06-13 22:36:26 +00:00
import mdnet.base.Constants
import mdnet.base.data.ImageData
import mdnet.base.data.ImageDatum
2020-07-02 16:06:32 +00:00
import mdnet.base.data.Statistics
2020-07-02 21:24:12 +00:00
import mdnet.base.data.Token
2020-07-04 19:39:11 +00:00
import mdnet.base.info
import mdnet.base.netty.Netty
import mdnet.base.settings.RemoteSettings
2020-07-02 21:24:12 +00:00
import mdnet.base.settings.ServerSettings
2020-07-04 19:39:11 +00:00
import mdnet.base.trace
import mdnet.base.warn
2020-06-13 22:36:26 +00:00
import mdnet.cache.CachingInputStream
import mdnet.cache.DiskLruCache
2020-08-22 03:23:18 +00:00
import mdnet.security.TweetNaclFast
import org.apache.http.client.config.CookieSpecs
import org.apache.http.client.config.RequestConfig
import org.apache.http.impl.client.HttpClients
2020-08-22 16:08:09 +00:00
import org.http4k.client.Apache4Client
2020-06-13 22:36:26 +00:00
import org.http4k.core.*
import org.http4k.filter.CachingFilters
import org.http4k.filter.ServerFilters
2020-06-13 22:36:26 +00:00
import org.http4k.lens.Path
import org.http4k.routing.bind
import org.http4k.routing.routes
import org.http4k.server.Http4kServer
import org.http4k.server.asServer
2020-08-11 19:12:01 +00:00
import org.jetbrains.exposed.exceptions.ExposedSQLException
2020-06-15 22:25:31 +00:00
import org.jetbrains.exposed.sql.Database
import org.jetbrains.exposed.sql.SchemaUtils
import org.jetbrains.exposed.sql.transactions.transaction
2020-06-13 22:36:26 +00:00
import org.slf4j.LoggerFactory
private val LOGGER = LoggerFactory.getLogger(ImageServer::class.java)
2020-07-04 19:39:11 +00:00
class ImageServer(
private val cache: DiskLruCache,
private val database: Database,
private val statistics: AtomicReference<Statistics>,
private val remoteSettings: RemoteSettings,
2020-07-04 19:39:11 +00:00
private val client: HttpHandler
) {
2020-06-15 22:25:31 +00:00
init {
2020-08-03 14:51:30 +00:00
synchronized(database) {
transaction(database) {
SchemaUtils.create(ImageData)
}
2020-06-15 22:25:31 +00:00
}
}
2020-06-13 22:36:26 +00:00
private val executor = Executors.newCachedThreadPool()
2020-07-02 21:24:12 +00:00
fun handler(dataSaver: Boolean, tokenized: Boolean = false): HttpHandler {
2020-08-22 03:23:18 +00:00
val box = TweetNaclFast.SecretBox(remoteSettings.tokenKey)
2020-07-02 21:24:12 +00:00
return baseHandler().then { request ->
val chapterHash = Path.of("chapterHash")(request)
val fileName = Path.of("fileName")(request)
val sanitizedUri = if (dataSaver) {
"/data-saver"
} else {
"/data"
} + "/$chapterHash/$fileName"
if (!request.referrerMatches(ALLOWED_REFERER_DOMAINS)) {
LOGGER.info { "Request for $sanitizedUri rejected due to non-allowed referrer ${request.header("Referer")}" }
return@then Response(Status.FORBIDDEN)
}
if (tokenized || remoteSettings.forceTokens) {
2020-07-02 21:24:12 +00:00
val tokenArr = Base64.getUrlDecoder().decode(Path.of("token")(request))
if (tokenArr.size < 24) {
LOGGER.info { "Request for $sanitizedUri rejected for invalid token" }
return@then Response(Status.FORBIDDEN)
}
2020-07-04 19:00:59 +00:00
val token = try {
JACKSON.readValue<Token>(
2020-08-22 03:23:18 +00:00
box.open(tokenArr.sliceArray(24 until tokenArr.size), tokenArr.sliceArray(0 until 24)).apply {
if (this == null) {
LOGGER.info { "Request for $sanitizedUri rejected for invalid token" }
return@then Response(Status.FORBIDDEN)
}
2020-07-02 21:24:12 +00:00
}
2020-07-04 19:00:59 +00:00
)
} catch (e: JsonProcessingException) {
2020-08-22 03:23:18 +00:00
LOGGER.info(e) { "Request for $sanitizedUri rejected for invalid token" }
2020-07-04 19:00:59 +00:00
return@then Response(Status.FORBIDDEN)
}
2020-07-02 21:24:12 +00:00
if (OffsetDateTime.now().isAfter(token.expires)) {
2020-07-04 19:39:11 +00:00
LOGGER.info { "Request for $sanitizedUri rejected for expired token" }
2020-07-02 21:24:12 +00:00
return@then Response(Status.GONE)
}
2020-06-13 22:36:26 +00:00
2020-07-02 21:24:12 +00:00
if (token.hash != chapterHash) {
2020-07-04 19:39:11 +00:00
LOGGER.info { "Request for $sanitizedUri rejected for inapplicable token" }
2020-07-02 21:24:12 +00:00
return@then Response(Status.FORBIDDEN)
}
}
2020-06-13 22:36:26 +00:00
2020-07-02 21:24:12 +00:00
statistics.getAndUpdate {
it.copy(requestsServed = it.requestsServed + 1)
}
2020-06-15 22:25:31 +00:00
2020-07-02 21:24:12 +00:00
val rc4Bytes = if (dataSaver) {
md5Bytes("saver$chapterHash.$fileName")
} else {
md5Bytes("$chapterHash.$fileName")
}
2020-07-02 21:24:12 +00:00
val imageId = printHexString(rc4Bytes)
2020-06-13 22:36:26 +00:00
2020-07-02 21:24:12 +00:00
val snapshot = cache.getUnsafe(imageId.toCacheId())
val imageDatum = synchronized(database) {
transaction(database) {
ImageDatum.findById(imageId)
2020-06-15 22:25:31 +00:00
}
}
2020-07-02 21:24:12 +00:00
if (snapshot != null && imageDatum != null && imageDatum.contentType.isImageMimetype()) {
2020-07-02 21:24:12 +00:00
request.handleCacheHit(sanitizedUri, getRc4(rc4Bytes), snapshot, imageDatum)
} else {
if (snapshot != null) {
snapshot.close()
LOGGER.warn { "Removing broken cache file for $sanitizedUri" }
2020-07-02 21:24:12 +00:00
cache.removeUnsafe(imageId.toCacheId())
2020-08-22 03:23:18 +00:00
cache.flush()
2020-06-15 22:25:31 +00:00
}
2020-07-04 15:27:43 +00:00
request.handleCacheMiss(sanitizedUri, getRc4(rc4Bytes), imageId, imageDatum)
2020-07-02 21:24:12 +00:00
}
2020-06-13 22:36:26 +00:00
}
}
2020-07-05 16:56:15 +00:00
/**
* Filters referrers based on passed (sub)domains. Ignores `scheme` (protocol) in URL
*/
private fun Request.referrerMatches(allowedDomains: List<String>, permitBlank: Boolean = true): Boolean {
val referer = this.header("Referer") ?: return permitBlank // Referrer was misspelled as "Referer" and now we're stuck with it -_-
if (referer == "") return permitBlank
return allowedDomains.any {
referer.substringAfter("//") // Ignore scheme
.substringBefore("/") // Ignore path
2020-07-05 20:41:43 +00:00
.substringBefore(":")
2020-07-05 16:56:15 +00:00
.endsWith(it)
}
}
2020-06-15 22:25:31 +00:00
private fun Request.handleCacheHit(sanitizedUri: String, cipher: Cipher, snapshot: DiskLruCache.Snapshot, imageDatum: ImageDatum): Response {
2020-06-13 22:36:26 +00:00
// our files never change, so it's safe to use the browser cache
return if (this.header("If-Modified-Since") != null) {
statistics.getAndUpdate {
it.copy(browserCached = it.browserCached + 1)
}
2020-07-04 19:39:11 +00:00
LOGGER.info { "Request for $sanitizedUri cached by browser" }
2020-06-13 22:36:26 +00:00
2020-06-16 23:00:28 +00:00
val lastModified = imageDatum.lastModified
2020-06-13 22:36:26 +00:00
snapshot.close()
Response(Status.NOT_MODIFIED)
.header("Last-Modified", lastModified)
} else {
statistics.getAndUpdate {
it.copy(cacheHits = it.cacheHits + 1)
}
2020-07-04 19:39:11 +00:00
LOGGER.info { "Request for $sanitizedUri hit cache" }
2020-06-13 22:36:26 +00:00
respondWithImage(
CipherInputStream(BufferedInputStream(snapshot.getInputStream(0)), cipher),
2020-06-15 22:25:31 +00:00
snapshot.getLength(0).toString(), imageDatum.contentType, imageDatum.lastModified,
2020-06-13 22:36:26 +00:00
true
)
}
}
2020-07-04 15:27:43 +00:00
private fun Request.handleCacheMiss(sanitizedUri: String, cipher: Cipher, imageId: String, imageDatum: ImageDatum?): Response {
2020-07-04 19:39:11 +00:00
LOGGER.info { "Request for $sanitizedUri missed cache" }
2020-06-13 22:36:26 +00:00
statistics.getAndUpdate {
it.copy(cacheMisses = it.cacheMisses + 1)
}
val mdResponse = client(Request(Method.GET, "${remoteSettings.imageServer}$sanitizedUri"))
2020-06-13 22:36:26 +00:00
if (mdResponse.status != Status.OK) {
2020-07-04 19:39:11 +00:00
LOGGER.trace { "Upstream query for $sanitizedUri errored with status ${mdResponse.status}" }
2020-06-13 22:36:26 +00:00
mdResponse.close()
return Response(mdResponse.status)
}
val contentType = mdResponse.header("Content-Type")!!
val contentLength = mdResponse.header("Content-Length")
val lastModified = mdResponse.header("Last-Modified")
if (!contentType.isImageMimetype()) {
LOGGER.trace { "Upstream query for $sanitizedUri returned bad mimetype $contentType" }
mdResponse.close()
return Response(Status.INTERNAL_SERVER_ERROR)
}
LOGGER.trace { "Upstream query for $sanitizedUri succeeded" }
2020-06-15 22:25:31 +00:00
val editor = cache.editUnsafe(imageId.toCacheId())
2020-06-13 22:36:26 +00:00
// A null editor means that this file is being written to
// concurrently so we skip the cache process
return if (editor != null && contentLength != null && lastModified != null) {
2020-07-04 19:39:11 +00:00
LOGGER.trace { "Request for $sanitizedUri is being cached and served" }
2020-06-15 22:25:31 +00:00
2020-07-04 15:29:49 +00:00
if (imageDatum == null) {
2020-08-11 19:12:01 +00:00
try {
synchronized(database) {
transaction(database) {
ImageDatum.new(imageId) {
this.contentType = contentType
this.lastModified = lastModified
}
2020-07-04 15:27:43 +00:00
}
}
2020-08-11 19:12:01 +00:00
} catch (_: ExposedSQLException) {
// some other code got to the database first, fall back to just serving
editor.abort()
LOGGER.trace { "Request for $sanitizedUri is being served" }
respondWithImage(mdResponse.body.stream, contentLength, contentType, lastModified, false)
2020-06-15 22:25:31 +00:00
}
}
2020-06-13 22:36:26 +00:00
val tee = CachingInputStream(
mdResponse.body.stream,
executor, CipherOutputStream(BufferedOutputStream(editor.newOutputStream(0)), cipher)
) {
2020-06-15 22:25:31 +00:00
try {
if (editor.getLength(0) == contentLength.toLong()) {
2020-07-04 19:39:11 +00:00
LOGGER.info { "Cache download for $sanitizedUri committed" }
2020-06-15 22:25:31 +00:00
editor.commit()
2020-08-22 03:23:18 +00:00
cache.flush()
2020-06-15 22:25:31 +00:00
} else {
2020-07-04 19:39:11 +00:00
LOGGER.warn { "Cache download for $sanitizedUri aborted" }
2020-06-15 22:25:31 +00:00
editor.abort()
2020-06-13 22:36:26 +00:00
}
2020-06-15 22:25:31 +00:00
} catch (e: Exception) {
2020-07-04 19:39:11 +00:00
LOGGER.warn(e) { "Cache go/no go for $sanitizedUri failed" }
2020-06-13 22:36:26 +00:00
}
}
respondWithImage(tee, contentLength, contentType, lastModified, false)
} else {
editor?.abort()
2020-07-04 19:39:11 +00:00
LOGGER.trace { "Request for $sanitizedUri is being served" }
2020-06-13 22:36:26 +00:00
respondWithImage(mdResponse.body.stream, contentLength, contentType, lastModified, false)
}
}
2020-06-15 22:25:31 +00:00
private fun String.toCacheId() =
this.substring(0, 8).replace("..(?!$)".toRegex(), "$0 ").split(" ".toRegex())
.plus(this).joinToString(File.separator)
2020-06-13 22:36:26 +00:00
private fun respondWithImage(input: InputStream, length: String?, type: String, lastModified: String?, cached: Boolean): Response =
Response(Status.OK)
.header("Content-Type", type)
.header("X-Content-Type-Options", "nosniff")
.let {
if (length != null) {
it.body(input, length.toLong()).header("Content-Length", length)
} else {
it.body(input).header("Transfer-Encoding", "chunked")
}
}
.let {
if (lastModified != null) {
it.header("Last-Modified", lastModified)
} else {
it
}
2020-06-14 11:04:20 +00:00
}
.header("X-Cache", if (cached) "HIT" else "MISS")
2020-06-21 19:49:10 +00:00
companion object {
2020-07-02 21:24:12 +00:00
private val JACKSON: ObjectMapper = jacksonObjectMapper()
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
.registerModule(JavaTimeModule())
2020-07-05 16:56:15 +00:00
private val ALLOWED_REFERER_DOMAINS = listOf("mangadex.org", "mangadex.network") // TODO: Factor out hardcoded domains?
private fun baseHandler(): Filter =
CachingFilters.Response.MaxAge(Clock.systemUTC(), Constants.MAX_AGE_CACHE)
.then(Filter { next: HttpHandler ->
{ request: Request ->
val response = next(request)
2020-07-04 22:51:07 +00:00
response.header("access-control-allow-origin", "https://mangadex.org")
2020-07-04 22:59:20 +00:00
.header("access-control-allow-headers", "*")
2020-07-04 23:54:07 +00:00
.header("access-control-allow-methods", "GET")
2020-07-04 22:59:20 +00:00
.header("timing-allow-origin", "https://mangadex.org")
}
})
2020-06-21 19:49:10 +00:00
}
2020-06-13 22:36:26 +00:00
}
private fun String.isImageMimetype() = this.toLowerCase().startsWith("image/")
2020-06-13 22:36:26 +00:00
2020-08-11 19:12:01 +00:00
fun getServer(cache: DiskLruCache, database: Database, remoteSettings: RemoteSettings, serverSettings: ServerSettings, statistics: AtomicReference<Statistics>, isHandled: AtomicBoolean): Http4kServer {
2020-08-22 16:08:09 +00:00
val client = Apache4Client(responseBodyMode = BodyMode.Stream, client = HttpClients.custom()
.disableConnectionState()
.setDefaultRequestConfig(
RequestConfig.custom()
.setCookieSpec(CookieSpecs.IGNORE_COOKIES)
.setConnectTimeout(3000)
.setSocketTimeout(3000)
.setConnectionRequestTimeout(3000)
.build())
.setMaxConnTotal(3000)
.setMaxConnPerRoute(3000)
.build())
val imageServer = ImageServer(cache, database, statistics, remoteSettings, client)
return timeRequest()
.then(catchAllHideDetails())
.then(ServerFilters.CatchLensFailure)
.then(setHandled(isHandled))
.then(addCommonHeaders())
.then(
routes(
"/data/{chapterHash}/{fileName}" bind Method.GET to imageServer.handler(dataSaver = false),
"/data-saver/{chapterHash}/{fileName}" bind Method.GET to imageServer.handler(dataSaver = true),
"/{token}/data/{chapterHash}/{fileName}" bind Method.GET to imageServer.handler(
dataSaver = false,
tokenized = true
),
"/{token}/data-saver/{chapterHash}/{fileName}" bind Method.GET to imageServer.handler(
dataSaver = true,
tokenized = true
)
)
)
.asServer(Netty(remoteSettings.tls!!, serverSettings, statistics))
2020-06-13 22:36:26 +00:00
}
fun setHandled(isHandled: AtomicBoolean): Filter {
return Filter { next: HttpHandler ->
{
isHandled.set(true)
next(it)
}
2020-06-13 22:36:26 +00:00
}
}
fun timeRequest(): Filter {
return Filter { next: HttpHandler ->
{ request: Request ->
val cleanedUri = request.uri.path.let {
if (it.startsWith("/data")) {
it
} else {
it.replaceBefore("/data", "/{token}")
}
}
LOGGER.info { "Request for $cleanedUri received from ${request.source?.address}" }
val start = System.currentTimeMillis()
val response = next(request)
val latency = System.currentTimeMillis() - start
LOGGER.info { "Request for $cleanedUri completed (TTFB) in ${latency}ms" }
response.header("X-Time-Taken", latency.toString())
}
}
}