Skip to content

Commit

Permalink
Magic numbers support
Browse files Browse the repository at this point in the history
  • Loading branch information
morisil committed Nov 5, 2024
1 parent 2109744 commit 61decfd
Show file tree
Hide file tree
Showing 12 changed files with 145 additions and 33 deletions.
20 changes: 20 additions & 0 deletions src/commonMain/kotlin/content/Content.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import kotlinx.serialization.ExperimentalSerializationApi
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable
import kotlinx.serialization.json.JsonClassDiscriminator
import kotlin.io.encoding.Base64
import kotlin.io.encoding.ExperimentalEncodingApi

@Serializable
@JsonClassDiscriminator("type")
Expand Down Expand Up @@ -33,3 +35,21 @@ interface ContentBuilder {
}

}

interface DataBuilder {

var bytes: ByteArray?

fun magicNumber(): MagicNumber {
val bytes = requireNotNull(bytes) {
"bytes must be provided"
}
return requireNotNull(bytes.findMagicNumber()) {
"provided bytes do not contain any supported format"
}
}

@OptIn(ExperimentalEncodingApi::class)
fun toBase64(): String = Base64.encode(bytes!!)

}
27 changes: 24 additions & 3 deletions src/commonMain/kotlin/content/Document.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package com.xemantic.anthropic.content
import com.xemantic.anthropic.cache.CacheControl
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable
import kotlin.io.encoding.ExperimentalEncodingApi

@Serializable
@SerialName("document")
Expand Down Expand Up @@ -32,10 +33,30 @@ data class Document(

}

class Builder {
var data: ByteArray? = null
var mediaType: MediaType? = null
class Builder : DataBuilder {
override var bytes: ByteArray? = null
var cacheControl: CacheControl? = null
}

}

fun MagicNumber.toDocumentMediaType(): Document.MediaType? = when (this) {
MagicNumber.PDF -> Document.MediaType.APPLICATION_PDF
else -> null
}

fun Document(block: Document.Builder.() -> Unit): Document {
val builder = Document.Builder()
block(builder)
val magicNumber = builder.magicNumber()
val mediaType = requireNotNull(magicNumber.toDocumentMediaType()) {
"provided bytes do not contain supported Document format"
}
@OptIn(ExperimentalEncodingApi::class)
return Document(
source = Document.Source(
mediaType = mediaType,
data = builder.toBase64()
)
)
}
32 changes: 15 additions & 17 deletions src/commonMain/kotlin/content/Image.kt
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ package com.xemantic.anthropic.content
import com.xemantic.anthropic.cache.CacheControl
import kotlinx.serialization.SerialName
import kotlinx.serialization.Serializable
import kotlin.io.encoding.Base64
import kotlin.io.encoding.ExperimentalEncodingApi

@Serializable
@SerialName("image")
Expand Down Expand Up @@ -40,32 +38,32 @@ data class Image(

}

class Builder {
var data: ByteArray? = null
var mediaType: MediaType? = null
class Builder : DataBuilder {
override var bytes: ByteArray? = null
var cacheControl: CacheControl? = null
}

}

// TODO move image magic here from Claudine to further simplify the API
fun MagicNumber.toImageMediaType(): Image.MediaType? = when (this) {
MagicNumber.JPEG -> Image.MediaType.IMAGE_JPEG
MagicNumber.PNG -> Image.MediaType.IMAGE_PNG
MagicNumber.GIF -> Image.MediaType.IMAGE_GIF
MagicNumber.WEBP -> Image.MediaType.IMAGE_WEBP
else -> null
}

// TODO write it functional way
fun Image(block: Image.Builder.() -> Unit): Image {
val builder = Image.Builder()
block(builder)
val magicNumber = builder.magicNumber()
val mediaType = requireNotNull(magicNumber.toImageMediaType()) {
"provided bytes do not contain any supported Image format"
}
return Image(
source = Image.Source(
mediaType = requireNotNull(builder.mediaType) {
"Image 'mediaType' must be defined"
},
data =
@OptIn(ExperimentalEncodingApi::class)
Base64.encode(
requireNotNull(builder.data) {
"Image 'data' must be defined"
}
)
mediaType = mediaType,
data = builder.toBase64()
)
)
}
44 changes: 44 additions & 0 deletions src/commonMain/kotlin/content/MagicNumbers.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package com.xemantic.anthropic.content

@OptIn(ExperimentalUnsignedTypes::class)
enum class MagicNumber(
vararg magic: UByte,
private val test: (
data: ByteArray,
magic: ByteArray
) -> Boolean = { data, magic ->
data.startsWith(magic)
}
) {

PDF(*"%PDF-".toUByteArray()),
JPEG(0xFFu, 0xD8u, 0xFFu),
PNG(0x89u, 0x50u, 0x4Eu, 0x47u, 0x0Du, 0x0Au, 0x1Au, 0x0Au),
GIF(*"GIF8".toUByteArray()),
WEBP(*"WEBP".toUByteArray(), test = { data, magic ->
(data.size >= 12) && data.slice(8..11).toByteArray().contentEquals(magic)
});

private val magic = magic.toUByteArray()

companion object {
fun find(data: ByteArray): MagicNumber? =
entries.find { it.test(data, it.magic.toByteArray()) }
}

}

fun ByteArray.findMagicNumber(): MagicNumber? = MagicNumber.find(this)

@OptIn(ExperimentalUnsignedTypes::class)
private fun String.toUByteArray() = toCharArray().map {
it.code.toUByte()
}.toUByteArray()

fun ByteArray.startsWith(
prefix: ByteArray
): Boolean =
(size >= prefix.size)
&& slice(prefix.indices)
.toByteArray()
.contentEquals(prefix)
18 changes: 8 additions & 10 deletions src/jvmMain/kotlin/content/JvmDocument.kt
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
package com.xemantic.anthropic.content

import java.io.File
import kotlin.io.encoding.Base64
import kotlin.io.encoding.ExperimentalEncodingApi

fun Document(path: String): Document = Document(File(path))
fun Document(path: String): Document = Document {
path(path)
}

fun Document.Builder.path(path: String) = file(File(path))

// TODO in the future this can be moved to jvmAndPosixMain
// TODO in the future, if more types are supported, the magic number should be used to determine the media type.
@OptIn(ExperimentalEncodingApi::class)
fun Document(path: File): Document = Document(
source = Document.Source(
mediaType = Document.MediaType.APPLICATION_PDF,
data = Base64.encode(path.readBytes())
)
)
fun Document.Builder.file(file: File) {
bytes = file.readBytes()
}
6 changes: 5 additions & 1 deletion src/jvmMain/kotlin/content/JvmImage.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@ package com.xemantic.anthropic.content

import java.io.File

fun Image(path: String): Image = Image {
path(path)
}

fun Image.Builder.path(path: String) = file(File(path))

fun Image.Builder.file(file: File) {
data = file.readBytes()
bytes = file.readBytes()
}
3 changes: 1 addition & 2 deletions src/jvmMain/kotlin/tool/computer/JvmComputer.kt
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ import javax.imageio.ImageIO
object JvmComputerService : ComputerService {

override fun screenshot() = Image {
data = takeScreenshot()
mediaType = Image.MediaType.IMAGE_JPEG
bytes = takeScreenshot()
}

}
Expand Down
28 changes: 28 additions & 0 deletions src/jvmTest/kotlin/content/MagicNumberTest.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
package com.xemantic.anthropic.content

import io.kotest.matchers.shouldBe
import org.junit.jupiter.api.Test
import java.io.File

class MagicNumberTest {

@Test
fun shouldDetectImageMediaType() {
File(
"test-data/minimal.pdf"
).readBytes().findMagicNumber() shouldBe MagicNumber.PDF
File(
"test-data/minimal.jpg"
).readBytes().findMagicNumber() shouldBe MagicNumber.JPEG
File(
"test-data/minimal.png"
).readBytes().findMagicNumber() shouldBe MagicNumber.PNG
File(
"test-data/minimal.gif"
).readBytes().findMagicNumber() shouldBe MagicNumber.GIF
File(
"test-data/minimal.webp"
).readBytes().findMagicNumber() shouldBe MagicNumber.WEBP
}

}
Binary file added test-data/minimal.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test-data/minimal.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test-data/minimal.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added test-data/minimal.webp
Binary file not shown.

0 comments on commit 61decfd

Please sign in to comment.