Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add media type detection when linking a file #5191

Merged
merged 1 commit into from
Oct 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.contexts.{files => fi
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model._
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.routes.{DelegateFilesRoutes, FilesRoutes, LinkFilesRoutes}
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.schemas.{files => filesSchemaId}
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.{FileAttributesUpdateStream, Files}
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.{FileAttributesUpdateStream, Files, FormDataExtractor, MediaTypeDetector}
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.StoragesConfig.{ShowFileLocation, StorageTypeConfig}
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.contexts.{storages => storageCtxId, storagesMetadata => storageMetaCtxId}
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.model._
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.FileOperations
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.{FileOperations, LinkFileAction}
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.disk.DiskFileOperations
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.remote.RemoteDiskFileOperations
import ch.epfl.bluebrain.nexus.delta.plugins.storage.storages.operations.remote.client.RemoteDiskStorageClient
Expand Down Expand Up @@ -191,6 +191,15 @@ class StoragePluginModule(priority: Int) extends ModuleDef {
FileOperations.apply(disk, remoteDisk, s3)
}

make[MediaTypeDetector].from { (cfg: StoragePluginConfig) =>
new MediaTypeDetector(cfg.files.mediaTypeDetector)
}

make[LinkFileAction].from {
(fetchStorage: FetchStorage, mediaTypeDetector: MediaTypeDetector, s3FileOps: S3FileOperations) =>
LinkFileAction(fetchStorage, mediaTypeDetector, s3FileOps)
}

make[Files].from {
(
cfg: StoragePluginConfig,
Expand All @@ -200,19 +209,20 @@ class StoragePluginModule(priority: Int) extends ModuleDef {
clock: Clock[IO],
uuidF: UUIDF,
as: ActorSystem[Nothing],
fileOps: FileOperations
fileOps: FileOperations,
mediaTypeDetector: MediaTypeDetector,
linkFileAction: LinkFileAction
) =>
Files(
fetchContext,
fetchStorage,
FormDataExtractor(mediaTypeDetector)(as.classicSystem),
xas,
cfg.files,
cfg.files.eventLog,
fileOps,
linkFileAction,
clock
)(
uuidF,
as
)
)(uuidF)
}

make[FileAttributesUpdateStream].fromEffect {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
package ch.epfl.bluebrain.nexus.delta.plugins.storage.files

import akka.actor.typed.ActorSystem
import akka.actor.{ActorSystem => ClassicActorSystem}
import akka.http.scaladsl.model.ContentTypes.`application/octet-stream`
import akka.http.scaladsl.model.Uri
import cats.effect.{Clock, IO}
Expand Down Expand Up @@ -32,6 +30,7 @@ import ch.epfl.bluebrain.nexus.delta.sdk.projects.FetchContext
import ch.epfl.bluebrain.nexus.delta.sdk.projects.model.{ApiMappings, ProjectContext}
import ch.epfl.bluebrain.nexus.delta.sourcing.ScopedEntityDefinition.Tagger
import ch.epfl.bluebrain.nexus.delta.sourcing._
import ch.epfl.bluebrain.nexus.delta.sourcing.config.EventLogConfig
import ch.epfl.bluebrain.nexus.delta.sourcing.model.Identity.Subject
import ch.epfl.bluebrain.nexus.delta.sourcing.model.Tag.UserTag
import ch.epfl.bluebrain.nexus.delta.sourcing.model.{EntityType, ProjectRef, ResourceRef, SuccessElemStream}
Expand All @@ -47,7 +46,8 @@ final class Files(
log: FilesLog,
fetchContext: FetchContext,
fetchStorage: FetchStorage,
fileOperations: FileOperations
fileOperations: FileOperations,
linkFile: LinkFileAction
)(implicit uuidF: UUIDF) {

implicit private val kamonComponent: KamonMetricComponent = KamonMetricComponent(entityType.value)
Expand Down Expand Up @@ -280,17 +280,11 @@ final class Files(
tag: Option[UserTag]
)(implicit caller: Caller): IO[FileResource] = {
for {
projectContext <- fetchContext.onCreate(project)
iri <- id.fold(generateId(projectContext)) { FileId.iriExpander(_, projectContext) }
storageIri <- storageId.traverse(expandStorageIri(_, projectContext))
(storageRef, storage) <- fetchStorage.onWrite(storageIri, project)
s3Metadata <- fileOperations.link(storage, linkRequest.path)
filename <- IO.fromOption(linkRequest.path.lastSegment)(InvalidFilePath)
attr = FileAttributes.from(
FileDescription(filename, linkRequest.mediaType.orElse(s3Metadata.contentType), linkRequest.metadata),
s3Metadata.metadata
)
res <- eval(CreateFile(iri, project, storageRef, storage.tpe, attr, caller.subject, tag))
projectContext <- fetchContext.onCreate(project)
iri <- id.fold(generateId(projectContext)) { FileId.iriExpander(_, projectContext) }
storageIri <- storageId.traverse(expandStorageIri(_, projectContext))
storageWrite <- linkFile(storageIri, project, linkRequest)
res <- eval(CreateFile(iri, project, storageWrite, caller.subject, tag))
} yield res
}.span("linkFile")

Expand All @@ -302,28 +296,12 @@ final class Files(
tag: Option[UserTag]
)(implicit caller: Caller): IO[FileResource] = {
for {
(iri, pc) <- id.expandIri(fetchContext.onModify)
storageIri <- storageId.traverse(expandStorageIri(_, pc))
_ <- test(UpdateFile(iri, id.project, testStorageRef, testStorageType, testAttributes, rev, caller.subject, tag))
(storageRef, storage) <- fetchStorage.onWrite(storageIri, id.project)
s3Metadata <- fileOperations.link(storage, linkRequest.path)
filename <- IO.fromOption(linkRequest.path.lastSegment)(InvalidFilePath)
attr = FileAttributes.from(
FileDescription(filename, linkRequest.mediaType.orElse(s3Metadata.contentType), linkRequest.metadata),
s3Metadata.metadata
)
res <- eval(
UpdateFile(
iri,
id.project,
storageRef,
storage.tpe,
attr,
rev,
caller.subject,
tag
)
)
(iri, pc) <- id.expandIri(fetchContext.onModify)
project = id.project
storageIri <- storageId.traverse(expandStorageIri(_, pc))
_ <- test(UpdateFile(iri, project, testStorageRef, testStorageType, testAttributes, rev, caller.subject, tag))
storageWrite <- linkFile(storageIri, project, linkRequest)
res <- eval(UpdateFile(iri, project, storageWrite, rev, caller.subject, tag))
} yield res
}.span("updateLinkedFile")

Expand Down Expand Up @@ -353,7 +331,7 @@ final class Files(
_ <- test(UpdateFile(iri, id.project, testStorageRef, testStorageType, testAttributes, rev, caller.subject, tag))
(storageRef, storage) <- fetchStorage.onWrite(storageIri, id.project)
metadata <- legacyLinkFile(storage, path, description.filename, iri)
attributes = FileAttributes.from(description, metadata)
attributes = FileAttributes.from(description.filename, description.mediaType, description.metadata, metadata)
res <- eval(UpdateFile(iri, id.project, storageRef, storage.tpe, attributes, rev, caller.subject, tag))
} yield res
}.span("updateLink")
Expand Down Expand Up @@ -493,7 +471,8 @@ final class Files(
_ <- test(CreateFile(iri, project, testStorageRef, testStorageType, testAttributes, caller.subject, tag))
(storageRef, storage) <- fetchStorage.onWrite(storageIri, project)
storageMetadata <- legacyLinkFile(storage, path, description.filename, iri)
fileAttributes = FileAttributes.from(description, storageMetadata)
fileAttributes =
FileAttributes.from(description.filename, description.mediaType, description.metadata, storageMetadata)
res <- eval(CreateFile(iri, project, storageRef, storage.tpe, fileAttributes, caller.subject, tag))
} yield res

Expand All @@ -514,9 +493,8 @@ final class Files(
private def saveFileToStorage(iri: Iri, storage: Storage, uploadRequest: FileUploadRequest): IO[FileAttributes] = {
for {
info <- formDataExtractor(uploadRequest.entity, storage.storageValue.maxFileSize)
description = FileDescription.from(info, uploadRequest.metadata)
storageMetadata <- fileOperations.save(storage, info, uploadRequest.contentLength)
} yield FileAttributes.from(description, storageMetadata)
} yield FileAttributes.from(info.filename, info.contentType, uploadRequest.metadata, storageMetadata)
}.adaptError { case e: SaveFileRejection => SaveRejection(iri, storage.id, e) }

private def generateId(pc: ProjectContext): IO[Iri] =
Expand Down Expand Up @@ -776,21 +754,21 @@ object Files {
def apply(
fetchContext: FetchContext,
fetchStorage: FetchStorage,
formDataExtractor: FormDataExtractor,
xas: Transactors,
config: FilesConfig,
eventLogConfig: EventLogConfig,
fileOps: FileOperations,
linkFile: LinkFileAction,
clock: Clock[IO]
)(implicit
uuidF: UUIDF,
as: ActorSystem[Nothing]
): Files = {
implicit val classicAs: ClassicActorSystem = as.classicSystem
uuidF: UUIDF
): Files =
new Files(
FormDataExtractor(config.mediaTypeDetector),
ScopedEventLog(definition(clock), config.eventLog, xas),
formDataExtractor,
ScopedEventLog(definition(clock), eventLogConfig, xas),
fetchContext,
fetchStorage,
fileOps
fileOps,
linkFile
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,9 @@ import akka.stream.scaladsl.{Keep, Sink}
import cats.effect.IO
import cats.syntax.all._
import ch.epfl.bluebrain.nexus.delta.kernel.error.NotARejection
import ch.epfl.bluebrain.nexus.delta.kernel.http.MediaTypeDetectorConfig
import ch.epfl.bluebrain.nexus.delta.kernel.utils.FileUtils
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.FileRejection.{FileTooLarge, InvalidMultipartFieldName, WrappedAkkaRejection}

import scala.concurrent.{ExecutionContext, Future}
import scala.util.Try

trait FormDataExtractor {

Expand All @@ -34,9 +31,9 @@ trait FormDataExtractor {
def apply(entity: HttpEntity, maxFileSize: Long): IO[UploadedFileInformation]
}

case class UploadedFileInformation(
final case class UploadedFileInformation(
filename: String,
suppliedContentType: ContentType,
contentType: Option[ContentType],
contents: BodyPartEntity
)

Expand All @@ -61,9 +58,7 @@ object FormDataExtractor {
createStrict = (_, parts) => Multipart.FormData.Strict(parts)
)

def apply(
mediaTypeDetector: MediaTypeDetectorConfig
)(implicit as: ActorSystem): FormDataExtractor =
def apply(mediaTypeDetector: MediaTypeDetector)(implicit as: ActorSystem): FormDataExtractor =
new FormDataExtractor {
implicit val ec: ExecutionContext = as.getDispatcher

Expand Down Expand Up @@ -115,40 +110,19 @@ object FormDataExtractor {

private def extractFile(part: FormData.BodyPart): Future[Option[UploadedFileInformation]] = part match {
case part if part.name == FileFieldName =>
val filename = part.filename.filterNot(_.isEmpty).getOrElse(defaultFilename)
val contentType = detectContentType(filename, part.entity.contentType)
val filename = part.filename.filterNot(_.isEmpty).getOrElse(defaultFilename)
val contentTypeFromRequest = part.entity.contentType
val suppliedContentType = Option.when(contentTypeFromRequest != defaultContentType)(contentTypeFromRequest)

Future(
UploadedFileInformation(
filename,
contentType,
mediaTypeDetector(filename, suppliedContentType, Some(contentTypeFromRequest)),
part.entity
).some
)
case part =>
part.entity.discardBytes().future.as(None)
}

private def detectContentType(filename: String, contentTypeFromRequest: ContentType) = {
val bodyDefinedContentType = Option.when(contentTypeFromRequest != defaultContentType)(contentTypeFromRequest)

val extensionOpt = FileUtils.extension(filename)

def detectFromConfig = for {
extension <- extensionOpt
customMediaType <- mediaTypeDetector.find(extension)
} yield contentType(customMediaType)

def detectAkkaFromExtension = extensionOpt.flatMap { e =>
Try(MediaTypes.forExtension(e)).map(contentType).toOption
}

bodyDefinedContentType
.orElse(detectFromConfig)
.orElse(detectAkkaFromExtension)
.getOrElse(contentTypeFromRequest)
}

private def contentType(mediaType: MediaType) = ContentType(mediaType, () => HttpCharsets.`UTF-8`)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package ch.epfl.bluebrain.nexus.delta.plugins.storage.files

import akka.http.scaladsl.model.{ContentType, HttpCharsets, MediaType, MediaTypes}
import ch.epfl.bluebrain.nexus.delta.kernel.http.MediaTypeDetectorConfig
import ch.epfl.bluebrain.nexus.delta.kernel.utils.FileUtils

import scala.util.Try

/**
* Allows to detect a content type from incoming files from their extensions when the client has not provided one
*
* @param config
* the config with a mapping from the extension to the content type
*/
final class MediaTypeDetector(config: MediaTypeDetectorConfig) {

def apply(filename: String, provided: Option[ContentType], fallback: Option[ContentType]): Option[ContentType] = {
val extensionOpt = FileUtils.extension(filename)

def detectFromConfig = for {
extension <- extensionOpt
customMediaType <- config.find(extension)
} yield contentType(customMediaType)

def detectAkkaFromExtension = extensionOpt.flatMap { e =>
Try(MediaTypes.forExtension(e)).map(contentType).toOption
}

provided
.orElse(detectFromConfig)
.orElse(detectAkkaFromExtension)
.orElse(fallback)
}

private def contentType(mediaType: MediaType) = ContentType(mediaType, () => HttpCharsets.`UTF-8`)

}
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ import akka.http.scaladsl.model.{ContentType, Uri}
import ch.epfl.bluebrain.nexus.delta.plugins.storage.files.model.FileAttributes.FileAttributesOrigin
import ch.epfl.bluebrain.nexus.delta.sdk.implicits._
import ch.epfl.bluebrain.nexus.delta.sourcing.model.Label
import io.circe.{Decoder, Encoder}
import io.circe.generic.extras.Configuration
import io.circe.generic.extras.semiauto.deriveConfiguredEncoder
import io.circe.{Decoder, Encoder}

import java.util.UUID

Expand Down Expand Up @@ -44,31 +44,23 @@ final case class FileAttributes(
bytes: Long,
digest: Digest,
origin: FileAttributesOrigin
) extends LimitedFileAttributes

trait LimitedFileAttributes {
def location: Uri
def path: Path
def filename: String
def mediaType: Option[ContentType]
def keywords: Map[Label, String]
def description: Option[String]
def name: Option[String]
def bytes: Long
def digest: Digest
def origin: FileAttributesOrigin
}
)

object FileAttributes {

def from(description: FileDescription, storageMetadata: FileStorageMetadata): FileAttributes = {
val customMetadata = description.metadata.getOrElse(FileCustomMetadata.empty)
def from(
filename: String,
contentType: Option[ContentType],
metadata: Option[FileCustomMetadata],
storageMetadata: FileStorageMetadata
): FileAttributes = {
val customMetadata = metadata.getOrElse(FileCustomMetadata.empty)
FileAttributes(
storageMetadata.uuid,
storageMetadata.location,
storageMetadata.path,
description.filename,
description.mediaType,
filename,
contentType,
customMetadata.keywords.getOrElse(Map.empty),
customMetadata.description,
customMetadata.name,
Expand Down
Loading