From f3bd8e80101adcba8b9a9a7a7295c6c58d5ba07f Mon Sep 17 00:00:00 2001 From: Mike Perrone Date: Wed, 27 Mar 2024 18:11:49 -0700 Subject: [PATCH 1/2] version 3 spec refers to itself correctly --- specs/type/0.3.0.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specs/type/0.3.0.json b/specs/type/0.3.0.json index 92fe6b0..127d71b 100644 --- a/specs/type/0.3.0.json +++ b/specs/type/0.3.0.json @@ -1,6 +1,6 @@ { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$id": "https://recap.build/specs/type/0.2.0.json", + "$id": "https://recap.build/specs/type/0.3.0.json", "$ref": "#/$defs/RecapType", "$defs": { "RecapType": { From 6b8b420d8c32738f6accdde2434f6306dc4cc8bf Mon Sep 17 00:00:00 2001 From: Mike Perrone Date: Wed, 27 Mar 2024 18:17:52 -0700 Subject: [PATCH 2/2] add new minor version 0.4.0 including the unknown type --- specs/type/0.4.0.json | 681 +++++++++++++++++++++++++++++++++++++ specs/type/0.4.0.md | 760 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1441 insertions(+) create mode 100644 specs/type/0.4.0.json create mode 100644 specs/type/0.4.0.md diff --git a/specs/type/0.4.0.json b/specs/type/0.4.0.json new file mode 100644 index 0000000..1931179 --- /dev/null +++ b/specs/type/0.4.0.json @@ -0,0 +1,681 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://recap.build/specs/type/0.4.0.json", + "$ref": "#/$defs/RecapType", + "$defs": { + "RecapType": { + "oneOf": [ + { + "$ref": "#/$defs/RecapNull" + }, + { + "$ref": "#/$defs/RecapBool" + }, + { + "$ref": "#/$defs/RecapInt" + }, + { + "$ref": "#/$defs/RecapFloat" + }, + { + "$ref": "#/$defs/RecapString" + }, + { + "$ref": "#/$defs/RecapBytes" + }, + { + "$ref": "#/$defs/RecapList" + }, + { + "$ref": "#/$defs/RecapMap" + }, + { + "$ref": "#/$defs/RecapStruct" + }, + { + "$ref": "#/$defs/RecapEnum" + }, + { + "$ref": "#/$defs/RecapUnion" + }, + { + "$ref": "#/$defs/RecapAliasReference" + }, + { + "$ref": "#/$defs/RecapUnknown" + } + ], + "allOf": [ + { + "$ref": "#/$defs/RecapLogicalTypes" + } + ] + }, + "RecapNull": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["null"] + }, + "alias": { + "type": "string", + "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*(?:\\.[a-zA-Z_][a-zA-Z0-9_]*)+$" + }, + "doc": { + "type": "string" + }, + "logical": { + "type": "string" + } + } + }, + "RecapBool": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["bool"] + }, + "alias": { + "type": "string", + "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*(?:\\.[a-zA-Z_][a-zA-Z0-9_]*)+$" + }, + "doc": { + "type": "string" + }, + "logical": { + "type": "string" + } + } + }, + "RecapInt": { + "type": "object", + "required": ["type", "bits"], + "properties": { + "type": { + "type": "string", + "enum": ["int"] + }, + "bits": { + "type": "integer", + "minimum": 1, + "maximum": 2147483647 + }, + "signed": { + "type": "boolean" + }, + "alias": { + "type": "string", + "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*(?:\\.[a-zA-Z_][a-zA-Z0-9_]*)+$" + }, + "doc": { + "type": "string" + }, + "logical": { + "type": "string" + } + } + }, + "RecapFloat": { + "type": "object", + "required": ["type", "bits"], + "properties": { + "type": { + "type": "string", + "enum": ["float"] + }, + "bits": { + "type": "integer", + "minimum": 1, + "maximum": 2147483647 + }, + "alias": { + "type": "string", + "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*(?:\\.[a-zA-Z_][a-zA-Z0-9_]*)+$" + }, + "doc": { + "type": "string" + }, + "logical": { + "type": "string" + } + } + }, + "RecapString": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["string"] + }, + "bytes": { + "type": "integer", + "minimum": 1, + "maximum": 9223372036854775807 + }, + "variable": { + "type": "boolean", + "default": true + }, + "alias": { + "type": "string", + "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*(?:\\.[a-zA-Z_][a-zA-Z0-9_]*)+$" + }, + "doc": { + "type": "string" + }, + "logical": { + "type": "string" + } + }, + "if": { + "properties": { + "variable": { + "const": false + } + }, + "required": ["variable"] + }, + "then": { + "required": ["bytes"] + } + }, + "RecapBytes": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["bytes"] + }, + "bytes": { + "type": "integer", + "minimum": 1, + "maximum": 9223372036854775807 + }, + "variable": { + "type": "boolean" + }, + "alias": { + "type": "string", + "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*(?:\\.[a-zA-Z_][a-zA-Z0-9_]*)+$" + }, + "doc": { + "type": "string" + }, + "logical": { + "type": "string" + } + }, + "if": { + "properties": { + "variable": { + "const": false + } + }, + "required": ["variable"] + }, + "then": { + "required": ["bytes"] + } + }, + "RecapList": { + "type": "object", + "required": ["type", "values"], + "properties": { + "type": { + "type": "string", + "enum": ["list"] + }, + "values": { + "$ref": "#/$defs/RecapType" + }, + "length": { + "type": "integer", + "minimum": 1, + "maximum": 9223372036854775807 + }, + "variable": { + "type": "boolean", + "default": true + }, + "alias": { + "type": "string", + "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*(?:\\.[a-zA-Z_][a-zA-Z0-9_]*)+$" + }, + "doc": { + "type": "string" + }, + "logical": { + "type": "string" + } + }, + "if": { + "properties": { + "variable": { + "const": false + } + }, + "required": ["variable"] + }, + "then": { + "required": ["length"] + } + }, + "RecapMap": { + "type": "object", + "required": ["type", "keys", "values"], + "properties": { + "type": { + "type": "string", + "enum": ["map"] + }, + "keys": { + "$ref": "#/$defs/RecapType" + }, + "values": { + "$ref": "#/$defs/RecapType" + }, + "alias": { + "type": "string", + "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*(?:\\.[a-zA-Z_][a-zA-Z0-9_]*)+$" + }, + "doc": { + "type": "string" + }, + "logical": { + "type": "string" + } + } + }, + "RecapStruct": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["struct"] + }, + "name": { + "type": "string" + }, + "fields": { + "type": "array", + "items": { + "allOf": [ + { + "$ref": "#/$defs/RecapType" + }, + { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "default": { + "type": "string" + } + } + } + ] + } + }, + "alias": { + "type": "string", + "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*(?:\\.[a-zA-Z_][a-zA-Z0-9_]*)+$" + }, + "doc": { + "type": "string" + }, + "logical": { + "type": "string" + } + } + }, + "RecapEnum": { + "type": "object", + "required": ["type", "symbols"], + "properties": { + "type": { + "type": "string", + "enum": ["enum"] + }, + "symbols": { + "type": "array", + "items": { + "type": "string" + } + }, + "alias": { + "type": "string", + "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*(?:\\.[a-zA-Z_][a-zA-Z0-9_]*)+$" + }, + "doc": { + "type": "string" + }, + "logical": { + "type": "string" + } + } + }, + "RecapUnion": { + "type": "object", + "required": ["type", "types"], + "properties": { + "type": { + "type": "string", + "enum": ["union"] + }, + "types": { + "type": "array", + "items": { + "$ref": "#/$defs/RecapType" + } + }, + "alias": { + "type": "string", + "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*(?:\\.[a-zA-Z_][a-zA-Z0-9_]*)+$" + }, + "doc": { + "type": "string" + }, + "logical": { + "type": "string" + } + } + }, + "RecapAliasReference": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "not": { + "enum": [ + "null", + "bool", + "int", + "float", + "string", + "bytes", + "list", + "map", + "struct", + "enum", + "union" + ] + } + }, + "doc": { + "type": "string" + }, + "logical": { + "type": "string" + } + }, + "not": { + "required": [ + "alias" + ] + } + }, + "RecapUnknown": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "enum": ["unknown"] + }, + "alias": { + "type": "string", + "pattern": "^[a-zA-Z_][a-zA-Z0-9_]*(?:\\.[a-zA-Z_][a-zA-Z0-9_]*)+$" + }, + "doc": { + "type": "string" + }, + "logical": { + "type": "string" + } + } + }, + "RecapLogicalTypes": { + "allOf": [ + { + "if": { + "properties": { + "type": { + "const": "int" + }, + "logical": { + "const": "build.recap.Date" + } + }, + "required": ["type", "logical"] + }, + "then": { + "properties": { + "unit": { + "type": "string", + "enum": [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "millisecond", + "microsecond", + "nanosecond", + "picosecond" + ] + } + }, + "required": ["unit"] + } + }, + { + "if": { + "properties": { + "type": { + "const": "bytes" + }, + "logical": { + "const": "build.recap.Decimal" + } + }, + "required": ["type", "logical"] + }, + "then": { + "properties": { + "precision": { + "type": "integer", + "minimum": 1, + "maximum": 2147483647 + }, + "scale": { + "type": "integer", + "minimum": 0, + "maximum": 2147483647 + } + }, + "required": ["precision", "scale"] + } + }, + { + "if": { + "properties": { + "type": { + "const": "int" + }, + "logical": { + "const": "build.recap.Duration" + } + }, + "required": ["type", "logical"] + }, + "then": { + "properties": { + "unit": { + "type": "string", + "enum": [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "millisecond", + "microsecond", + "nanosecond", + "picosecond" + ] + } + }, + "required": ["unit"] + } + }, + { + "if": { + "properties": { + "type": { + "const": "bytes" + }, + "logical": { + "const": "build.recap.Interval" + }, + "bytes": { + "const": 16 + }, + "variable": { + "const": false + } + }, + "required": ["type", "logical", "bytes", "variable"] + }, + "then": { + "properties": { + "unit": { + "type": "string", + "enum": [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "millisecond", + "microsecond", + "nanosecond", + "picosecond" + ] + } + }, + "required": ["unit"] + } + }, + { + "if": { + "properties": { + "type": { + "const": "int" + }, + "logical": { + "const": "build.recap.Time" + } + }, + "required": ["type", "logical"] + }, + "then": { + "properties": { + "unit": { + "type": "string", + "enum": [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "millisecond", + "microsecond", + "nanosecond", + "picosecond" + ] + } + }, + "required": ["unit"] + } + }, + { + "if": { + "properties": { + "type": { + "const": "int" + }, + "logical": { + "const": "build.recap.Timestamp" + } + }, + "required": ["type", "logical"] + }, + "$comment": "Not validating timezone field because it's a huge list.", + "then": { + "properties": { + "unit": { + "type": "string", + "enum": [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "millisecond", + "microsecond", + "nanosecond", + "picosecond" + ] + }, + "timezone": { + "type": "string" + } + }, + "required": ["unit"] + } + }, + { + "if": { + "properties": { + "type": { + "const": "string" + }, + "logical": { + "const": "build.recap.UUID" + } + }, + "required": ["type", "logical"] + }, + "then": { + "properties": { + "bytes": { + "type": "integer", + "minimum": 36, + "maximum": 9223372036854775807, + "default": 65536 + }, + "variable": { + "const": false + } + }, + "required": ["variable"] + } + } + ] + } + } +} diff --git a/specs/type/0.4.0.md b/specs/type/0.4.0.md new file mode 100644 index 0000000..edc7200 --- /dev/null +++ b/specs/type/0.4.0.md @@ -0,0 +1,760 @@ +--- +layout: default +title: "Type Spec 0.3.0" +parent: Type Spec +--- + +# Recap Type Spec +{: .no_toc } + +* Version 0.3.0 + +1. TOC +{:toc} + +{% comment %} This is a little helper variable {% endcomment %} +{% assign trimmed_url_length = page.url.size | minus: 1 %} +{% assign json_spec_url = page.url | slice: 0, trimmed_url_length | append: ".json" %} + +## Introduction + +This document defines Recap's types. It is intended to be the authoritative specification. Recap type implementations must adhere to this document. + +This spec uses [YAML](https://yaml.org) to provide examples, but Recap's types are agnostic to the serialized format. Recap types may be defined in YAML, TOML, JSON, XML, or any other compatible language. + +Recap's type spec is also available as a [JSON schema]({{ json_spec_url }}), which you can use to validate your type definitions. + +### What is Recap's Type Spec? +{: .no_toc } + +Recap's type spec describes a data model that can model relation database schemas and RPC IDLs with minimal type coercion. It's similar to [Apache Arrow](https://arrow.apache.org/)'s [Schema.fbs](https://github.com/apache/arrow/blob/main/format/Schema.fbs) or [Apache Kafka](https://kafka.apache.org)'s [Schema.java](https://github.com/apache/kafka/blob/trunk/connect/api/src/main/java/org/apache/kafka/connect/data/Schema.java). + +### Why Does Recap Type Spec Exist? +{: .no_toc } + +Data passes through web services, databases, message brokers, and object stores. Each system describes its data differently. Developers have historically written conversion logic and tooling for each system. Repeatedly building custom logic and tooling is inefficient and error-prone. Recap's type system describes schemas in a standard data model so a single set of converters and tools can be built to deal with data as it moves between systems. + +## Types + +Recap supports the following types: + +* `null` +* `bool` +* `int` +* `float` +* `string` +* `bytes` +* `list` +* `map` +* `struct` +* `enum` +* `union` +* `unknown` + +This section defines each type. + +*NOTE: Attribute types in the spec define what type a Recap implementation should use when storing the attribute. Attribute types are not the same as Recap types unless noted.* + +### `null` + +A null value. + +### `bool` + +A boolean value. + +### `int` + +An integer value with a fixed bit length. + +#### Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| bits | Number of bits. | 32-bit signed integer | YES | | +| signed | False means the integer is unsigned. | Boolean | NO | true | + +#### Examples +{: .no_toc } + +```yaml +# A 32-bit signed integer +type: int +bits: 32 +signed: true +``` + +### `float` + +An IEEE 754 encoded floating point value with a fixed bit length. + +#### Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| bits | Number of bits. | 32-bit signed integer | YES | | + +#### Examples +{: .no_toc } + +```yaml +# A 32-bit IEEE 754 encoded float +type: float +bits: 32 +``` + +### `string` + +A UTF-8 encoded Unicode string with a maximum byte length. + +#### Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| bytes | The maximum number of bytes to store the string. Must be >= 1 if set. If unset, the string length is infinite. | 64-bit signed integer \| null | NO | null | +| variable | If true, the string is variable-length (`<= bytes`). If false, `bytes` must be set. | Boolean | NO | true | + +#### Examples +{: .no_toc } + +```yaml +# A VARCHAR(255) +type: string +bytes: 255 +variable: true +``` + +### `bytes` + +A byte array value. + +#### Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| bytes | The maximum number of bytes that can be stored in the byte array. Must be >= 1 if set. If unset, the byte length is infinite. | 64-bit signed integer \| null | NO | null | +| variable | If true, the byte array is variable-length (`<= bytes`). If false, `bytes` must be set. | Boolean | NO | true | + +#### Examples +{: .no_toc } + +```yaml +# A VARBINARY(255) +type: bytes +bytes: 255 +variable: true +``` + +### `list` + +A list of values all sharing the same type. + +#### Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| values | Type for all items in the list. | *Recap type object* | YES | | +| length | The maximum length of the list. Must be >= 1 if set. If unset, the list size is infinite. | 64-bit signed integer \| null | NO | null | +| variable | If true, the list is variable-length (`<= length` if `length` is set). If false, `length` must be set. | Boolean | NO | true | + +#### Examples +{: .no_toc } + +```yaml +# A list of unsigned 64-bit integers +type: list +values: + type: int + bits: 64 + signed: false +``` + +### `map` + +A map of key/value pairs where each key is the same type and each value is the same type. + +#### Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| keys | Type for all items in the key set. | *Recap type object* | YES | | +| values | Type for all value items. | *Recap type object* | YES | | + +#### Examples +{: .no_toc } + +```yaml +# A map from 32-bit strings to boolean values +type: map +keys: + type: string + bytes: 2_147_483_647 +values: + type: bool +``` + +### `struct` + +An ordered collection of Recap types. Table schemas are typically represented as Recap structs, though the two are not the same. Recap structs support additional features like the nested structs (like a [Protobuf Message](https://protobuf.dev/reference/protobuf/proto3-spec/#message_definition)) and unnamed fields (like a CSV file with no header). + +#### `struct` Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| name | The struct's name. | String | NO | | +| fields | An ordered list of Recap types. | *List of Recap type objects* | NO | [] | + +#### `field` Attributes +{: .no_toc } + +Recap types in the `fields` attribute can have an extra `name` attribute to set a field's name. + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| name | The field's name. | String | NO | | + +#### Examples +{: .no_toc } + +```yaml +# A struct with a required signed 32-bit integer field called "id" +type: struct +fields: + - name: id + type: int + bits: 32 + - name: email + type: string + bytes: 255 +``` + +### `enum` + +An enumeration of string symbols. + +#### Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| symbols | An ordered list of string symbols. | List of strings | YES | | + +#### Examples +{: .no_toc } + +```yaml +# An enum with RGB symbols +type: enum +symbols: ["RED", "GREEN", "BLUE"] +``` + +### `union` + +A value that can be one of several types. It is acceptable for a value to be more than one of the types in the union. + +#### Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| types | A list of types the value can be. | *List of Recap type objects* | YES | | + +#### Examples +{: .no_toc } + +```yaml +# A union type of null or a 32-bit signed int +type: union +types: + - type: "null" + - type: int + bits: 32 +``` + +Unions can also be defined as a list of types: + +```yaml +# A union type of null or a boolean +type: ["null", "bool"] +``` + +### `unknown` + +A value that is of a type that was not recognized by the type converter or cannot otherwise be represented in recap. This provides the option to represent the name of the data in a struct even if the type cannot be understood. + +## Documentation + +All types support a `doc` attribute, which allows developers to document types. + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| doc | A documentation string for the type. | A string \| null | NO | null | + +```yaml +type: union +doc: A union type of null or a 32-bit signed int +types: + - type: "null" + - type: int + bits: 32 +``` + +## Defaults + +Recap types can have a `default` attribute with a literal value. The value is used when the field is not set in a struct. + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| default | The default value for a reader if the field is not set in the struct. | Literal of any type | NO | | + +```yaml +# A struct with a field called "id" that defaults to 0 +type: struct +fields: + - name: id + type: int + bits: 32 + default: 0 +``` + +A missing default is differentiated from a default with a null value. An unset default is treated as "no default", while a default that's been set to null is treated as a null default. + +*NOTE: Database defaults often appear as strings like `nextval('\"public\".some_id_seq'::regclass)` or `'USD'::character varying`. Such defaults are left to the developer to interpret based on the database they're using.* + +## Optional Types + +Optional types are expressed as a union with a `null` type and a null default (similar to [Avro's fields](https://avro.apache.org/docs/1.10.2/spec.html#schema_record)). + +```yaml +# A struct with an optional string field called "secondary_phone" +type: struct +fields: + - name: secondary_phone + type: union + types: ["null", "string32"] + default: "null" +``` + +Unions can be cumbersome, so Recap supports a shorthand syntax for types using `optional`: + +```yaml +# A struct with an optional string field called "secondary_phone" +type: struct +fields: + - name: secondary_phone + type: string32? + optional: true +``` + +The above shorthand is equivalent to the union example. + +Optional types also work for unions: + +```yaml +# A union type of null or a 32-bit signed int +type: union +types: + - type: int32 + - type: float32 +optional: true +``` + +Optionals also work with aliases: + +```yaml +# A struct with an optional string field called "secondary_phone" +type: struct +fields: + - name: phone + alias: phone_type + type: string32 + - name: secondary_phone + type: phone_type + optional: true +``` + +Optionality is not inherited in aliases: + +```yaml +type: struct +fields: + - name: phone + alias: phone_type + type: string32 + optional: true + - name: secondary_phone + type: phone_type +``` + +## Logical Types + +Logical types annotate one of the 11 Recap types listed in the *Types* section at the top of the spec. Logical types add additional context to Recap types that help converters. For example, a `decimal` logical type can be defined as: + +```yaml +type: bytes +logical: decimal +precision: 6 +scale: 3 +bytes: 16 +variable: false +``` + +Developers may define their own logical types. + +Logical types may require additional attributes such as the `precision` and `scale` attributes shown above. Recap converters may use `logical` annotations to convert schemas to more accurate types such as SQL's `DECIMAL` type (rather than `BYTES`). + +Logical type names are globally unique, so they must include a unique dotted namespace prefix. + +### Built-in Logical Types +{: .no_toc } + +Recap comes with a collection of built-in logical types: + +* `build.recap.Date`: An integer representing a length of time since the UNIX epoch without timezones and leap seconds. +* `build.recap.Decimal`: A byte array representing an arbitrary-precision decimal number. +* `build.recap.Duration`: An integer representing a length of time and time unit without timezones and leap seconds. +* `build.recap.Interval`: A byte array representing an interval of time on a calendar measured in months, days, and a duration of intra-day time with a time unit. +* `build.recap.Time`: An integer representing a length of time since midnight without timezones and leap seconds. +* `build.recap.Timestamp`: An integer representing a length of time (in a time unit) since a specific epoch. +* `build.recap.UUID`: A string representing a UUID in 8-4-4-4-12 format as defined in [RFC 4122](https://www.ietf.org/rfc/rfc4122.txt). + +### `build.recap.Date` + +Elapsed time since the UNIX epoch without timezones and leap seconds. + +#### Annotates +{: .no_toc } + +`build.recap.Date` logical types must annotate `int` types. + +#### Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| unit | A string time unit. | String literal of year, month, day, hour, minute, second, millisecond, microsecond, nanosecond, or picosecond. | YES | | + +#### Examples +{: .no_toc } + +```yaml +type: int +logical: build.recap.Date +unit: day +``` + +### `build.recap.Decimal` + +An arbitrary-precision decimal number. This type is the same as [Avro's Decimal](https://avro.apache.org/docs/1.10.2/spec.html#Decimal). + +#### Annotates +{: .no_toc } + +`build.recap.Decimal` logical types must annotate `bytes` types. + +#### Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| precision | Total number of digits. 123.456 has a precision of 6. | 32-bit signed integer | YES | | +| scale | Digits to the right of the decimal point. 123.456 has a scale of 3. | 32-bit signed integer | YES | | + +#### Examples +{: .no_toc } + +```yaml +type: bytes +logical: build.recap.Decimal +precision: 6 +scale: 3 +bytes: 16 +variable: false +``` + +### `build.recap.Duration` + +A length of time without timezones and leap seconds. This type is the same as [Arrow's Duration](https://arrow.apache.org/docs/python/generated/pyarrow.duration.html) but with a superset of time units. + +#### Annotates +{: .no_toc } + +`build.recap.Duration` logical types must annotate `int` types. + +#### Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| unit | A string time unit. | String literal of year, month, day, hour, minute, second, millisecond, microsecond, nanosecond, or picosecond. | YES | | + +#### Examples +{: .no_toc } + +```yaml +type: int +logical: build.recap.Duration +bits: 64 +unit: millisecond +``` + +### `build.recap.Interval` + +An interval of time on a calendar. This measurement allows you to measure time without worrying about leap seconds, leap years, and time changes. Years, quarters, hours, and minutes can be expressed using this type. + +Intervals are measured in months, days, and an intra-day time measurement. Months and days are each 32-bit signed integers. The remainder is a 64-bit signed integer measured in a certain time unit. Leap seconds are ignored. + +#### Annotates +{: .no_toc } + +`build.recap.Interval` logical types must annotate `bytes` types with the `variable` attribute set to `false` and the `bytes` attribute set to `16`. + +`build.recap.Interval` is the same as [Avro's Duration](https://avro.apache.org/docs/1.10.2/spec.html#Duration) but with a superset of time units and 16 bytes instead of 12 bytes. + +#### Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| unit | A string time unit. | String literal of year, month, day, hour, minute, second, millisecond, microsecond, nanosecond, or picosecond. | YES | | + +#### Examples +{: .no_toc } + +```yaml +type: bytes +logical: build.recap.Interval +bytes: 16 +variable: false +unit: millisecond +``` + +### `build.recap.Time` + +Elapsed time since midnight without timezones and leap seconds. + +#### Annotates +{: .no_toc } + +`build.recap.Time` logical types must annotate `int` types. + +#### Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| unit | A string time unit. | String literal of year, month, day, hour, minute, second, millisecond, microsecond, nanosecond, or picosecond. | YES | | + +#### Examples +{: .no_toc } + +```yaml +type: int +logical: build.recap.Time +bits: 32 +unit: millisecond +``` + +### `build.recap.Timestamp` + +Time elapsed since a specific epoch. + +A timestamp with no timezone is a `DATETIME` in database parlance--a date and time as you would see it on wrist-watch. + +A timestamp with a timezone represents the amount of time elapsed since the 1970-01-01 00:00:00 epoch in UTC time zone (regardless of the timezone that's specified). Readers must translate the UTC timestamp to a timestamp value for the specified timezone. See Apache Arrow's [Schema.fbs](https://github.com/apache/arrow/blob/main/format/Schema.fbs) documentation for more details. + +This type is the same as [Arrow's timestamp](https://arrow.apache.org/docs/python/generated/pyarrow.timestamp.html) but with a superset of time units and an arbitrary bit length. + +#### Annotates +{: .no_toc } + +`build.recap.Timestamp` logical types must annotate `int` types. + +#### Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| unit | A string time unit. | String literal of year, month, day, hour, minute, second, millisecond, microsecond, nanosecond, or picosecond. | YES | | +| timezone | An optional [Olson timezone database](https://en.wikipedia.org/wiki/Tz_database) string. | A string \| null | NO | null | + +#### Examples +{: .no_toc } + +```yaml +type: int +logical: build.recap.Timestamp +bits: 64 +unit: millisecond +``` + +### `build.recap.UUID` + +A string representing a UUID in 8-4-4-4-12 format as defined in [RFC 4122](https://www.ietf.org/rfc/rfc4122.txt). + +#### Annotates +{: .no_toc } + +`build.recap.UUID` logical types must annotate `string` types with a `bytes` attribute greater than or equal to 36. + +#### Attributes +{: .no_toc } + +`build.recap.UUID` logical types have no additional attributes. + +#### Examples +{: .no_toc } + +```yaml +type: string +logical: build.recap.UUID +bytes: 36 +variable: false +``` + +## Aliases + +All types support an `alias` attribute. An `alias` must reference one of the 11 Recap types defined above. Aliases are useful in larger data structures where complex types are repeatedly used. + +Aliases are globally unique, so they must include a unique dotted namespace prefix. Naked aliases (aliases with no dotted namespace) are reserved for Recap's built-in aliases (see the next section). + +### Attributes +{: .no_toc } + +| Name | Description | Type | Required | Default | +|------|-------------|------|----------|---------| +| alias | An alias for the type. | A string \| null | NO | null | + +### Examples + +Aliases are referenced using the `type` field: + +```yaml +type: struct +doc: A book with pages +fields: + - name: previous + alias: com.mycorp.models.Page + type: int + bits: 32 + signed: false + - name: next + type: com.mycorp.models.Page +``` + +Recap will treat this struct the same as: + +```yaml +type: struct +doc: A book with pages +fields: + - name: previous + alias: com.mycorp.models.Page + type: int + bits: 32 + signed: false + # The `next` field has the same types and attributes + # as the `previous` one; they're both pages. + - name: next + type: int + bits: 32 + signed: false +``` + +Recap also allows cyclic references: + +```yaml +alias: com.mycorp.models.LinkedListUint32 +type: struct +doc: A linked list of unsigned 32-bit integers +fields: + - name: value + type: int + bits: 32 + signed: false + - name: next + type: com.mycorp.models.LinkedListUint32 +``` + +And attribute overrides: + +```yaml +type: struct +fields: + - name: id + alias: com.mycorp.models.Uint24 + type: int + bits: 24 + signed: false + - name: signed_id + type: com.mycorp.models.Uint24 + # Let's make this a signed int24 + signed: true +``` + +But alias inheritance is not allowed: + +```yaml +type: struct +doc: All fields have the same type +fields: + - name: field1 + alias: "com.mycorp.models.Field" + type: int + bits: 32 + signed: false + - name: field2 + type: com.mycorp.models.Field + # An alias of an alias isn't allowed. + alias: com.mycorp.models.FieldAlias + - name: field3 + type: com.mycorp.models.FieldAlias +``` + +### Built-in Aliases + +Recap comes with a collection of built-in aliases: + +* `int8`: An 8-bit signed integer. +* `uint8`: An 8-bit unsigned integer. +* `int16`: A 16-bit signed integer. +* `uint16`: A 16-bit unsigned integer. +* `int32`: A 32-bit signed integer. +* `uint32`: A 32-bit unsigned integer. +* `int64`: A 64-bit signed integer. +* `uint64`: A 64-bit unsigned integer. +* `float16`: A 16-bit IEEE 754 encoded floating point number. +* `float32`: A 32-bit IEEE 754 encoded floating point number. +* `float64`: A 64-bit IEEE 754 encoded floating point number. +* `string32`: A variable-length UTF-8 encoded unicode string with a maximum length of 2_147_483_648. +* `string64`: A variable-length UTF-8 encoded unicode string with a maximum length of 9_223_372_036_854_775_807. +* `bytes32`: A variable-length byte array with a maximum length of 2_147_483_648. +* `bytes64`: A variable-length byte array with a maximum length of 9_223_372_036_854_775_807. +* `uuid`: A fixed-length 36 byte string in UUID 8-4-4-4-12 string format as defined in [RFC 4122](https://www.ietf.org/rfc/rfc4122.txt). +* `decimal128`: An arbitrary-precision decimal number stored in a fixed-length 128-bit byte array. +* `decimal256`: An arbitrary-precision decimal number stored in a fixed-length 256-bit byte array. +* `duration64`: A length of time and time unit without timezones and leap seconds. +* `interval128`: An interval of time on a calendar measured in months, days, and a duration of intra-day time with a time unit. +* `time32`: Time since midnight without timezones and leap seconds in a 32-bit signed integer. +* `time64`: Time since midnight without timezones and leap seconds in a 64-bit signed integer. +* `timestamp64`: Time elapsed (in a time unit) since a specific epoch. +* `date32`: Date since the UNIX epoch without timezones and leap seconds in a 32-bit integer. +* `date64`: Date since the UNIX epoch without timezones and leap seconds in a 64-bit integer. + +## Changelog + +* 0.3.0 + * Allow strings and bytes with undefined byte lengths. ([#413](https://github.com/recap-build/recap/pull/413)) +* 0.2.0 + * Add `optional` attribute. ([#335](https://github.com/recap-build/recap/pull/335)) +* 0.1.1 + * Make `string` and `bytes` byte attribute optional. ([#284](https://github.com/recap-build/recap/discussions/284) [#292](https://github.com/recap-build/recap/pull/292) [#293](https://github.com/recap-build/recap/pull/293)) +* 0.1.0 + * Initial release.