From e369ecbebf422b69b7ba264d6b44182d601e80ac Mon Sep 17 00:00:00 2001 From: Felipe Cardozo Date: Mon, 18 Nov 2024 16:03:52 -0300 Subject: [PATCH] chore: release of fluvio v0.13.0 --- docusaurus.config.ts | 2 +- news/this-week-in-fluvio-0066.md | 78 +++ ...loy-start-http-source-cat-facts-xform.bash | 1 + ...dk-deploy-start-http-source-cat-facts.bash | 1 + .../cdk-deploy-start-http-source-quotes.bash | 1 + ...eploy-start-http-source-string-quotes.bash | 1 + .../cdk-deploy-start-sql-sink-cat-facts.bash | 1 + .../cmds/cdk-hub-download-http-source.bash | 1 + .../cmds/cdk-hub-download-sql-sink.bash | 1 + .../_embeds/cmds/hub-sm-download-jolt.bash | 2 + .../cmds/hub-sm-download-json-sql.bash | 1 + .../_embeds/cmds/hub-sm-list-jolt.bash | 4 + .../_embeds/cmds/sm-list-jolt.bash | 3 + .../_embeds/cmds/sm-list-json-sql-jolt.bash | 4 + .../connectors/http-cat-fact-basic.yaml | 11 + .../connectors/http-cat-fact-transform.yaml | 18 + .../http-source-quotes-transform.yaml | 17 + .../connectors/http-source-quotes.yaml | 10 + .../_embeds/connectors/sql-cat-fact.yaml | 27 + .../_embeds/transforms/infinyon-jolt.yaml | 8 + .../version-0.13.0/_embeds/versions.tsx | 5 + .../version-0.13.0/cloud/cli/_category_.json | 5 + .../version-0.13.0/cloud/cli/cluster.mdx | 122 +++++ .../version-0.13.0/cloud/cli/connector.mdx | 244 +++++++++ .../version-0.13.0/cloud/cli/overview.mdx | 23 + .../version-0.13.0/cloud/cli/secret.mdx | 123 +++++ .../version-0.13.0/cloud/cli/usage.mdx | 19 + .../version-0.13.0/cloud/cli/webhook.mdx | 164 ++++++ .../cloud/concepts/_category_.json | 5 + .../cloud/concepts/webhook-config.mdx | 51 ++ .../cloud/how-to/_category_.json | 5 + .../cloud/how-to/use-connectors.mdx | 152 ++++++ .../cloud/how-to/use-secrets.mdx | 49 ++ .../cloud/how-to/use-webhooks.mdx | 156 ++++++ .../version-0.13.0/cloud/overview.mdx | 28 + .../version-0.13.0/cloud/quickstart.mdx | 376 +++++++++++++ .../cloud/tutorials/_category_.json | 5 + .../cloud/tutorials/amplitude-analytics.mdx | 120 +++++ .../cloud/tutorials/cloudflare-workers.mdx | 200 +++++++ .../cloud/tutorials/github-to-discord.mdx | 169 ++++++ .../cloud/tutorials/github-to-slack.mdx | 168 ++++++ .../cloud/tutorials/hackernews-reader.mdx | 101 ++++ .../cloud/tutorials/http-to-sql.mdx | 363 +++++++++++++ .../version-0.13.0/cloud/tutorials/index.md | 5 + .../cloud/tutorials/iot-mirroring-cloud.mdx | 407 ++++++++++++++ .../cloud/tutorials/mqtt-to-sql.mdx | 508 ++++++++++++++++++ .../cloud/tutorials/try-mirroring-cloud.mdx | 235 ++++++++ .../cloud/tutorials/webhook-to-slack.mdx | 196 +++++++ .../cloud/tutorials/zappier-triggers.mdx | 211 ++++++++ .../version-0.13.0/connectors/cdk.mdx | 22 + .../connectors/configuration.mdx | 101 ++++ .../connectors/connector-hub.mdx | 10 + .../connectors/developers/_category_.json | 5 + .../connectors/developers/build.mdx | 52 ++ .../connectors/developers/generate.mdx | 112 ++++ .../connectors/developers/logging.mdx | 53 ++ .../connectors/developers/overview.mdx | 77 +++ .../connectors/developers/publish.mdx | 40 ++ .../connectors/developers/secrets.mdx | 64 +++ .../connectors/developers/start-shutdown.mdx | 56 ++ .../connectors/how-to/_category_.json | 5 + .../connectors/how-to/run-cloud.mdx | 97 ++++ .../connectors/how-to/run-local.mdx | 106 ++++ .../version-0.13.0/connectors/overview.mdx | 39 ++ .../connectors/troubleshooting.mdx | 69 +++ .../fluvio/apis/_category_.json | 5 + .../fluvio/apis/nodejs/_category_.json | 5 + .../fluvio/apis/nodejs/example.mdx | 124 +++++ .../fluvio/apis/nodejs/installation.mdx | 46 ++ .../version-0.13.0/fluvio/apis/overview.mdx | 121 +++++ .../fluvio/apis/python/_category_.json | 5 + .../fluvio/apis/python/example.mdx | 145 +++++ .../fluvio/apis/python/installation.mdx | 23 + .../fluvio/apis/rust/_category_.json | 5 + .../fluvio/apis/rust/example.mdx | 167 ++++++ .../fluvio/apis/rust/installation.mdx | 18 + .../version-0.13.0/fluvio/cli/_category_.json | 5 + .../version-0.13.0/fluvio/cli/cdk.mdx | 9 + .../fluvio/cli/fluvio/_category_.json | 5 + .../fluvio/cli/fluvio/cloud.mdx | 9 + .../fluvio/cli/fluvio/cluster.mdx | 132 +++++ .../fluvio/cli/fluvio/consume.mdx | 235 ++++++++ .../fluvio/cli/fluvio/hub/_category_.json | 5 + .../fluvio/cli/fluvio/hub/download.mdx | 47 ++ .../fluvio/cli/fluvio/hub/list.mdx | 30 ++ .../fluvio/cli/fluvio/partition.mdx | 30 ++ .../fluvio/cli/fluvio/produce.mdx | 296 ++++++++++ .../fluvio/cli/fluvio/profile.mdx | 148 +++++ .../fluvio/cli/fluvio/table-format.mdx | 414 ++++++++++++++ .../fluvio/cli/fluvio/topic.mdx | 185 +++++++ .../fluvio/cli/fluvio/version.mdx | 21 + .../version-0.13.0/fluvio/cli/overview.mdx | 56 ++ .../version-0.13.0/fluvio/cli/smdk.mdx | 9 + .../fluvio/concepts/_category_.json | 5 + .../fluvio/concepts/advanced/_category_.json | 5 + .../concepts/advanced/cluster-defaults.mdx | 110 ++++ .../fluvio/concepts/advanced/crds.mdx | 76 +++ .../fluvio/concepts/advanced/network.mdx | 45 ++ .../concepts/architecture/_category_.json | 5 + .../fluvio/concepts/architecture/auth.mdx | 128 +++++ .../fluvio/concepts/architecture/client.mdx | 173 ++++++ .../fluvio/concepts/architecture/overview.mdx | 141 +++++ .../architecture/replica-assignment.mdx | 242 +++++++++ .../architecture/replica-election.mdx | 197 +++++++ .../fluvio/concepts/architecture/sc.mdx | 404 ++++++++++++++ .../fluvio/concepts/architecture/spu.mdx | 161 ++++++ .../fluvio/concepts/batching.mdx | 92 ++++ .../fluvio/concepts/data-consistency.mdx | 63 +++ .../fluvio/concepts/delivery-semantics.mdx | 98 ++++ .../fluvio/concepts/offsets.mdx | 37 ++ .../concepts/operations/_category_.json | 5 + .../concepts/operations/data-retention.mdx | 188 +++++++ .../fluvio/concepts/operations/monitor.mdx | 55 ++ .../concepts/operations/troubleshooting.mdx | 62 +++ .../fluvio/concepts/operations/upgrade.mdx | 39 ++ .../fluvio/concepts/partitions.mdx | 76 +++ .../fluvio/concepts/produce-consume.mdx | 203 +++++++ .../fluvio/concepts/records.mdx | 25 + .../version-0.13.0/fluvio/concepts/topics.mdx | 41 ++ .../fluvio/concepts/transformations.mdx | 111 ++++ .../version-0.13.0/fluvio/fvm/_category_.json | 5 + .../version-0.13.0/fluvio/fvm/install.mdx | 63 +++ .../fluvio/fvm/introduction.mdx | 138 +++++ .../version-0.13.0/fluvio/fvm/update.mdx | 26 + .../fluvio/installation/_category_.json | 5 + .../installation/advanced/_category_.json | 5 + .../advanced/docker-custom-clients.mdx | 150 ++++++ .../advanced/kubernetes-advanced.mdx | 115 ++++ .../fluvio/installation/docker.mdx | 137 +++++ .../fluvio/installation/index.md | 12 + .../fluvio/installation/kubernetes.mdx | 109 ++++ .../fluvio/installation/local.mdx | 16 + .../version-0.13.0/fluvio/overview.mdx | 72 +++ .../version-0.13.0/fluvio/quickstart.mdx | 251 +++++++++ .../version-0.13.0/fluvio/troubleshooting.mdx | 44 ++ .../fluvio/tutorials/_category_.json | 5 + .../tutorials/connector-transformations.mdx | 142 +++++ .../fluvio/tutorials/http-source.mdx | 171 ++++++ .../version-0.13.0/fluvio/tutorials/index.md | 5 + .../fluvio/tutorials/mirroring-iot-local.mdx | 360 +++++++++++++ .../tutorials/mirroring-two-clusters.mdx | 274 ++++++++++ .../fluvio/tutorials/sql-sink.mdx | 194 +++++++ .../hub/connectors/_category_.json | 4 + .../hub/connectors/inbound/_category_.json | 4 + .../hub/connectors/inbound/http.mdx | 176 ++++++ .../hub/connectors/inbound/kafka.mdx | 51 ++ .../hub/connectors/inbound/mqtt.mdx | 145 +++++ .../version-0.13.0/hub/connectors/index.md | 5 + .../hub/connectors/outbound/_category_.json | 4 + .../hub/connectors/outbound/duckdb.mdx | 155 ++++++ .../hub/connectors/outbound/graphite.mdx | 159 ++++++ .../hub/connectors/outbound/http.mdx | 174 ++++++ .../hub/connectors/outbound/kafka.mdx | 97 ++++ .../hub/connectors/outbound/sql.mdx | 212 ++++++++ .../version-0.13.0/hub/overview.mdx | 48 ++ .../hub/smartmodules/_category_.json | 4 + .../version-0.13.0/hub/smartmodules/index.md | 9 + .../version-0.13.0/hub/smartmodules/jolt.mdx | 116 ++++ .../hub/smartmodules/json-sql.mdx | 169 ++++++ .../version-0.13.0/hub/smartmodules/regex.mdx | 49 ++ .../version-0.13.0/hub/use-the-hub.mdx | 70 +++ .../smartmodules/developers/_category_.json | 5 + .../smartmodules/developers/build.mdx | 62 +++ .../smartmodules/developers/generate.mdx | 95 ++++ .../smartmodules/developers/load.mdx | 124 +++++ .../smartmodules/developers/overview.mdx | 47 ++ .../smartmodules/developers/publish.mdx | 53 ++ .../smartmodules/features/_category_.json | 5 + .../smartmodules/features/chaining.mdx | 111 ++++ .../smartmodules/features/deduplication.mdx | 76 +++ .../smartmodules/features/lookback.mdx | 159 ++++++ .../features/operators/_category_.json | 5 + .../features/operators/array-map.mdx | 240 +++++++++ .../features/operators/filter-map.mdx | 284 ++++++++++ .../features/operators/filter.mdx | 331 ++++++++++++ .../smartmodules/features/operators/map.mdx | 216 ++++++++ .../version-0.13.0/smartmodules/overview.mdx | 101 ++++ .../smartmodules/quickstart.mdx | 197 +++++++ .../version-0.13.0/smartmodules/smdk.mdx | 24 + .../smartmodules/tutorials/_category_.json | 5 + .../smartmodules/tutorials/make-uppercase.mdx | 100 ++++ .../version-0.13.0-sidebars.json | 57 ++ versions.json | 1 + 183 files changed, 16649 insertions(+), 1 deletion(-) create mode 100644 news/this-week-in-fluvio-0066.md create mode 100644 versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-cat-facts-xform.bash create mode 100644 versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-cat-facts.bash create mode 100644 versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-quotes.bash create mode 100644 versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-string-quotes.bash create mode 100644 versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-sql-sink-cat-facts.bash create mode 100644 versioned_docs/version-0.13.0/_embeds/cmds/cdk-hub-download-http-source.bash create mode 100644 versioned_docs/version-0.13.0/_embeds/cmds/cdk-hub-download-sql-sink.bash create mode 100644 versioned_docs/version-0.13.0/_embeds/cmds/hub-sm-download-jolt.bash create mode 100644 versioned_docs/version-0.13.0/_embeds/cmds/hub-sm-download-json-sql.bash create mode 100644 versioned_docs/version-0.13.0/_embeds/cmds/hub-sm-list-jolt.bash create mode 100644 versioned_docs/version-0.13.0/_embeds/cmds/sm-list-jolt.bash create mode 100644 versioned_docs/version-0.13.0/_embeds/cmds/sm-list-json-sql-jolt.bash create mode 100644 versioned_docs/version-0.13.0/_embeds/connectors/http-cat-fact-basic.yaml create mode 100644 versioned_docs/version-0.13.0/_embeds/connectors/http-cat-fact-transform.yaml create mode 100644 versioned_docs/version-0.13.0/_embeds/connectors/http-source-quotes-transform.yaml create mode 100644 versioned_docs/version-0.13.0/_embeds/connectors/http-source-quotes.yaml create mode 100644 versioned_docs/version-0.13.0/_embeds/connectors/sql-cat-fact.yaml create mode 100644 versioned_docs/version-0.13.0/_embeds/transforms/infinyon-jolt.yaml create mode 100644 versioned_docs/version-0.13.0/_embeds/versions.tsx create mode 100644 versioned_docs/version-0.13.0/cloud/cli/_category_.json create mode 100644 versioned_docs/version-0.13.0/cloud/cli/cluster.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/cli/connector.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/cli/overview.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/cli/secret.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/cli/usage.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/cli/webhook.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/concepts/_category_.json create mode 100644 versioned_docs/version-0.13.0/cloud/concepts/webhook-config.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/how-to/_category_.json create mode 100644 versioned_docs/version-0.13.0/cloud/how-to/use-connectors.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/how-to/use-secrets.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/how-to/use-webhooks.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/overview.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/quickstart.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/tutorials/_category_.json create mode 100644 versioned_docs/version-0.13.0/cloud/tutorials/amplitude-analytics.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/tutorials/cloudflare-workers.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/tutorials/github-to-discord.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/tutorials/github-to-slack.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/tutorials/hackernews-reader.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/tutorials/http-to-sql.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/tutorials/index.md create mode 100644 versioned_docs/version-0.13.0/cloud/tutorials/iot-mirroring-cloud.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/tutorials/mqtt-to-sql.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/tutorials/try-mirroring-cloud.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/tutorials/webhook-to-slack.mdx create mode 100644 versioned_docs/version-0.13.0/cloud/tutorials/zappier-triggers.mdx create mode 100644 versioned_docs/version-0.13.0/connectors/cdk.mdx create mode 100644 versioned_docs/version-0.13.0/connectors/configuration.mdx create mode 100644 versioned_docs/version-0.13.0/connectors/connector-hub.mdx create mode 100644 versioned_docs/version-0.13.0/connectors/developers/_category_.json create mode 100644 versioned_docs/version-0.13.0/connectors/developers/build.mdx create mode 100644 versioned_docs/version-0.13.0/connectors/developers/generate.mdx create mode 100644 versioned_docs/version-0.13.0/connectors/developers/logging.mdx create mode 100644 versioned_docs/version-0.13.0/connectors/developers/overview.mdx create mode 100644 versioned_docs/version-0.13.0/connectors/developers/publish.mdx create mode 100644 versioned_docs/version-0.13.0/connectors/developers/secrets.mdx create mode 100644 versioned_docs/version-0.13.0/connectors/developers/start-shutdown.mdx create mode 100644 versioned_docs/version-0.13.0/connectors/how-to/_category_.json create mode 100644 versioned_docs/version-0.13.0/connectors/how-to/run-cloud.mdx create mode 100644 versioned_docs/version-0.13.0/connectors/how-to/run-local.mdx create mode 100644 versioned_docs/version-0.13.0/connectors/overview.mdx create mode 100644 versioned_docs/version-0.13.0/connectors/troubleshooting.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/apis/_category_.json create mode 100644 versioned_docs/version-0.13.0/fluvio/apis/nodejs/_category_.json create mode 100644 versioned_docs/version-0.13.0/fluvio/apis/nodejs/example.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/apis/nodejs/installation.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/apis/overview.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/apis/python/_category_.json create mode 100644 versioned_docs/version-0.13.0/fluvio/apis/python/example.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/apis/python/installation.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/apis/rust/_category_.json create mode 100644 versioned_docs/version-0.13.0/fluvio/apis/rust/example.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/apis/rust/installation.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/_category_.json create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/cdk.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/fluvio/_category_.json create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/fluvio/cloud.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/fluvio/cluster.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/fluvio/consume.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/fluvio/hub/_category_.json create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/fluvio/hub/download.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/fluvio/hub/list.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/fluvio/partition.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/fluvio/produce.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/fluvio/profile.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/fluvio/table-format.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/fluvio/topic.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/fluvio/version.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/overview.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/cli/smdk.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/_category_.json create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/advanced/_category_.json create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/advanced/cluster-defaults.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/advanced/crds.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/advanced/network.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/architecture/_category_.json create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/architecture/auth.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/architecture/client.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/architecture/overview.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/architecture/replica-assignment.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/architecture/replica-election.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/architecture/sc.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/architecture/spu.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/batching.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/data-consistency.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/delivery-semantics.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/offsets.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/operations/_category_.json create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/operations/data-retention.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/operations/monitor.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/operations/troubleshooting.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/operations/upgrade.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/partitions.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/produce-consume.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/records.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/topics.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/concepts/transformations.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/fvm/_category_.json create mode 100644 versioned_docs/version-0.13.0/fluvio/fvm/install.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/fvm/introduction.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/fvm/update.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/installation/_category_.json create mode 100644 versioned_docs/version-0.13.0/fluvio/installation/advanced/_category_.json create mode 100644 versioned_docs/version-0.13.0/fluvio/installation/advanced/docker-custom-clients.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/installation/advanced/kubernetes-advanced.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/installation/docker.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/installation/index.md create mode 100644 versioned_docs/version-0.13.0/fluvio/installation/kubernetes.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/installation/local.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/overview.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/quickstart.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/troubleshooting.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/tutorials/_category_.json create mode 100644 versioned_docs/version-0.13.0/fluvio/tutorials/connector-transformations.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/tutorials/http-source.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/tutorials/index.md create mode 100644 versioned_docs/version-0.13.0/fluvio/tutorials/mirroring-iot-local.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/tutorials/mirroring-two-clusters.mdx create mode 100644 versioned_docs/version-0.13.0/fluvio/tutorials/sql-sink.mdx create mode 100644 versioned_docs/version-0.13.0/hub/connectors/_category_.json create mode 100644 versioned_docs/version-0.13.0/hub/connectors/inbound/_category_.json create mode 100644 versioned_docs/version-0.13.0/hub/connectors/inbound/http.mdx create mode 100644 versioned_docs/version-0.13.0/hub/connectors/inbound/kafka.mdx create mode 100644 versioned_docs/version-0.13.0/hub/connectors/inbound/mqtt.mdx create mode 100644 versioned_docs/version-0.13.0/hub/connectors/index.md create mode 100644 versioned_docs/version-0.13.0/hub/connectors/outbound/_category_.json create mode 100644 versioned_docs/version-0.13.0/hub/connectors/outbound/duckdb.mdx create mode 100644 versioned_docs/version-0.13.0/hub/connectors/outbound/graphite.mdx create mode 100644 versioned_docs/version-0.13.0/hub/connectors/outbound/http.mdx create mode 100644 versioned_docs/version-0.13.0/hub/connectors/outbound/kafka.mdx create mode 100644 versioned_docs/version-0.13.0/hub/connectors/outbound/sql.mdx create mode 100644 versioned_docs/version-0.13.0/hub/overview.mdx create mode 100644 versioned_docs/version-0.13.0/hub/smartmodules/_category_.json create mode 100644 versioned_docs/version-0.13.0/hub/smartmodules/index.md create mode 100644 versioned_docs/version-0.13.0/hub/smartmodules/jolt.mdx create mode 100644 versioned_docs/version-0.13.0/hub/smartmodules/json-sql.mdx create mode 100644 versioned_docs/version-0.13.0/hub/smartmodules/regex.mdx create mode 100644 versioned_docs/version-0.13.0/hub/use-the-hub.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/developers/_category_.json create mode 100644 versioned_docs/version-0.13.0/smartmodules/developers/build.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/developers/generate.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/developers/load.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/developers/overview.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/developers/publish.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/features/_category_.json create mode 100644 versioned_docs/version-0.13.0/smartmodules/features/chaining.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/features/deduplication.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/features/lookback.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/features/operators/_category_.json create mode 100644 versioned_docs/version-0.13.0/smartmodules/features/operators/array-map.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/features/operators/filter-map.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/features/operators/filter.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/features/operators/map.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/overview.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/quickstart.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/smdk.mdx create mode 100644 versioned_docs/version-0.13.0/smartmodules/tutorials/_category_.json create mode 100644 versioned_docs/version-0.13.0/smartmodules/tutorials/make-uppercase.mdx create mode 100644 versioned_sidebars/version-0.13.0-sidebars.json diff --git a/docusaurus.config.ts b/docusaurus.config.ts index 16387c1a..8245dd76 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -9,7 +9,7 @@ import sdfVersionsList from "./sdf_versions.json" with { type: "json" }; const FLUVIO_REPOSITORY_URL = "https://github.com/InfinyOn/fluvio"; -const STABLE_VERSION = "0.12.0"; +const STABLE_VERSION = "0.13.0"; // read sdf-versions.json and build versions object const sdfVersions = sdfVersionsList.reduce((acc, version) => ({ diff --git a/news/this-week-in-fluvio-0066.md b/news/this-week-in-fluvio-0066.md new file mode 100644 index 00000000..76086876 --- /dev/null +++ b/news/this-week-in-fluvio-0066.md @@ -0,0 +1,78 @@ +--- +title: "This Week in Fluvio #66" +date: 2024-11-18 +weight: 20 +--- +Fluvio is a distributed, programmable streaming platform written in Rust. + +--- +We released **Fluvio 0.13.0** last week. + +## New release +Fluvio **v0.13.0** is now available! + +To update you can run `fvm update` + +```bash +$ fvm update + +info: Updating fluvio stable to version 0.13.0. Current version is 0.12.1. +info: Downloading (1/5): fluvio@0.13.0 +info: Downloading (2/5): fluvio-cloud@0.2.26 +info: Downloading (3/5): fluvio-run@0.13.0 +info: Downloading (4/5): cdk@0.13.0 +info: Downloading (5/5): smdk@0.13.0 +done: Installed fluvio version 0.13.0 +done: Now using fluvio version 0.13.0 + +``` + +If you don't have Fluvio in your machine run: + +``` +curl -fsS https://hub.infinyon.cloud/install/install.sh | bash +``` + +If you are enjoying Fluvio please share with your friends! + +## New features +Notable changes in this new version: + +- The parameter `max-request-size` was added to the produce config and is recommended to use instead of `batch-size` for controlling request sizes. +- Added the `--partition` and `--mirror` arguments to the `fluvio produce` command, enabling you to specify the partition or mirror where records should be produced. + +## Bug fixes +This release includes several important bug fixes: + +- Resolved an issue where records exceeding the batch size caused unexpected behavior. +- Enhanced the reliability of linking parent objects on local mode to ensure consistent relationships. +- Fixed compatibility issues with older versions of the Fluvio CLI, ensuring seamless usage across versions. + +See the [CHANGELOG] for details + +## Good First Issues +We love our open source community contributors. Here are some issues that you could contribute to. All the best. + +- [Add topic_producer_with_config to the Python Client] +- [Improve fluvio topic describe with additional information] +- [Different default SPU port] + + +--- + +Get in touch with us on [Github Discussions] or join [our Discord channel] and come say hello! + +For the full list of changes this week, be sure to check out [our CHANGELOG]. + +[Fluvio open source]: https://github.com/infinyon/fluvio +[our CHANGELOG]: https://github.com/infinyon/fluvio/blob/master/CHANGELOG.md +[our Discord channel]: https://discordapp.com/invite/bBG2dTz +[Github Discussions]: https://github.com/infinyon/fluvio/discussions + +[this form]: https://infinyon.com/request/ss-early-access/ +[CHANGELOG]: https://github.com/infinyon/fluvio/blob/v0.13.0/CHANGELOG.md +[When a topic is deleted, connected clients should have their connection closed]: https://github.com/infinyon/fluvio/issues/3836 +[Remove localhost from fluvio in favor of 127.0.0.1]: https://github.com/infinyon/fluvio/issues/3866 +[Add topic_producer_with_config to the Python Client]: https://github.com/infinyon/fluvio/issues/4159 +[Improve fluvio topic describe with additional information]: https://github.com/infinyon/fluvio/issues/3968 +[Different default SPU port]: https://github.com/infinyon/fluvio/issues/3739 diff --git a/versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-cat-facts-xform.bash b/versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-cat-facts-xform.bash new file mode 100644 index 00000000..c8cec794 --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-cat-facts-xform.bash @@ -0,0 +1 @@ +cdk deploy start --ipkg infinyon-http-source-0.3.8.ipkg --config ./http-cat-facts-transform.yaml \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-cat-facts.bash b/versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-cat-facts.bash new file mode 100644 index 00000000..d0ae93f0 --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-cat-facts.bash @@ -0,0 +1 @@ +cdk deploy start --ipkg infinyon-http-source-0.3.8.ipkg --config ./http-cat-facts.yaml \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-quotes.bash b/versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-quotes.bash new file mode 100644 index 00000000..cba9bc71 --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-quotes.bash @@ -0,0 +1 @@ +cdk deploy start --ipkg infinyon-http-source-0.3.8.ipkg -c quotes-source-connector.yml \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-string-quotes.bash b/versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-string-quotes.bash new file mode 100644 index 00000000..439369db --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-http-source-string-quotes.bash @@ -0,0 +1 @@ +cdk deploy start --ipkg infinyon-http-source-0.3.8.ipkg -c string-quotes-source-connector.yml \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-sql-sink-cat-facts.bash b/versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-sql-sink-cat-facts.bash new file mode 100644 index 00000000..2e68256d --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/cmds/cdk-deploy-start-sql-sink-cat-facts.bash @@ -0,0 +1 @@ +cdk deploy start --ipkg infinyon-sql-sink-0.4.3.ipkg --config ./sql-cat-fact.yaml \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/cmds/cdk-hub-download-http-source.bash b/versioned_docs/version-0.13.0/_embeds/cmds/cdk-hub-download-http-source.bash new file mode 100644 index 00000000..76576b6a --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/cmds/cdk-hub-download-http-source.bash @@ -0,0 +1 @@ +cdk hub download infinyon/http-source@0.3.8 \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/cmds/cdk-hub-download-sql-sink.bash b/versioned_docs/version-0.13.0/_embeds/cmds/cdk-hub-download-sql-sink.bash new file mode 100644 index 00000000..6076dcd3 --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/cmds/cdk-hub-download-sql-sink.bash @@ -0,0 +1 @@ +cdk hub download infinyon/sql-sink@0.4.3 \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/cmds/hub-sm-download-jolt.bash b/versioned_docs/version-0.13.0/_embeds/cmds/hub-sm-download-jolt.bash new file mode 100644 index 00000000..29b73e57 --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/cmds/hub-sm-download-jolt.bash @@ -0,0 +1,2 @@ +fluvio hub smartmodule download infinyon/jolt@0.4.1 +... cluster smartmodule install complete diff --git a/versioned_docs/version-0.13.0/_embeds/cmds/hub-sm-download-json-sql.bash b/versioned_docs/version-0.13.0/_embeds/cmds/hub-sm-download-json-sql.bash new file mode 100644 index 00000000..1c786e2f --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/cmds/hub-sm-download-json-sql.bash @@ -0,0 +1 @@ +fluvio hub sm download infinyon/json-sql@0.2.1 \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/cmds/hub-sm-list-jolt.bash b/versioned_docs/version-0.13.0/_embeds/cmds/hub-sm-list-jolt.bash new file mode 100644 index 00000000..755cfd12 --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/cmds/hub-sm-list-jolt.bash @@ -0,0 +1,4 @@ +fluvio hub smartmodule list + SMARTMODULE Visibility + infinyon/jolt@0.4.1 public + ... \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/cmds/sm-list-jolt.bash b/versioned_docs/version-0.13.0/_embeds/cmds/sm-list-jolt.bash new file mode 100644 index 00000000..a6b44428 --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/cmds/sm-list-jolt.bash @@ -0,0 +1,3 @@ +fluvio smartmodule list + SMARTMODULE SIZE + infinyon/jolt@0.4.1 589.3 KB \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/cmds/sm-list-json-sql-jolt.bash b/versioned_docs/version-0.13.0/_embeds/cmds/sm-list-json-sql-jolt.bash new file mode 100644 index 00000000..2a1b365e --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/cmds/sm-list-json-sql-jolt.bash @@ -0,0 +1,4 @@ +fluvio sm list + SMARTMODULE SIZE + infinyon/json-sql@0.2.1 559.6 KB + infinyon/jolt@0.4.1 589.3 KB \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/connectors/http-cat-fact-basic.yaml b/versioned_docs/version-0.13.0/_embeds/connectors/http-cat-fact-basic.yaml new file mode 100644 index 00000000..40d93766 --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/connectors/http-cat-fact-basic.yaml @@ -0,0 +1,11 @@ +apiVersion: 0.1.0 +meta: + version: 0.3.8 + name: cat-facts + type: http-source + topic: cat-facts + create-topic: true + +http: + endpoint: "https://catfact.ninja/fact" + interval: 10s \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/connectors/http-cat-fact-transform.yaml b/versioned_docs/version-0.13.0/_embeds/connectors/http-cat-fact-transform.yaml new file mode 100644 index 00000000..9aff771d --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/connectors/http-cat-fact-transform.yaml @@ -0,0 +1,18 @@ +apiVersion: 0.1.0 +meta: + version: 0.3.8 + name: cat-facts-transformed + type: http-source + topic: cat-facts-data-transform + +http: + endpoint: https://catfact.ninja/fact + interval: 10s + +transforms: + - uses: infinyon/jolt@0.4.1 + with: + spec: + - operation: default + spec: + source: "http" \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/connectors/http-source-quotes-transform.yaml b/versioned_docs/version-0.13.0/_embeds/connectors/http-source-quotes-transform.yaml new file mode 100644 index 00000000..1540ef9d --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/connectors/http-source-quotes-transform.yaml @@ -0,0 +1,17 @@ +# string-quotes-source-connector.yml +apiVersion: 0.1.0 +meta: + version: 0.3.8 + name: string-quotes + type: http-source + topic: string-quotes +http: + endpoint: https://demo-data.infinyon.com/api/quote + interval: 3s +transforms: + - uses: infinyon/jolt@0.4.1 + with: + spec: + - operation: shift + spec: + quote: "" \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/connectors/http-source-quotes.yaml b/versioned_docs/version-0.13.0/_embeds/connectors/http-source-quotes.yaml new file mode 100644 index 00000000..fc67dffd --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/connectors/http-source-quotes.yaml @@ -0,0 +1,10 @@ +# quotes-source-connector.yml +apiVersion: 0.1.0 +meta: + version: 0.3.8 + name: http-quotes + type: http-source + topic: quotes +http: + endpoint: https://demo-data.infinyon.com/api/quote + interval: 3s \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/connectors/sql-cat-fact.yaml b/versioned_docs/version-0.13.0/_embeds/connectors/sql-cat-fact.yaml new file mode 100644 index 00000000..f525a306 --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/connectors/sql-cat-fact.yaml @@ -0,0 +1,27 @@ +# sql.yaml +apiVersion: 0.1.0 +meta: + name: simple-cat-facts-sql + type: sql-sink + version: 0.4.3 + topic: cat-facts +sql: + url: "postgres://user:password@db.postgreshost.example/dbname" +transforms: + - uses: infinyon/json-sql@0.2.1 + invoke: insert + with: + mapping: + table: "animalfacts" + map-columns: + "length": + json-key: "length" + value: + type: "int" + default: "0" + required: true + "raw_fact_json": + json-key: "$" + value: + type: "jsonb" + required: true \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/transforms/infinyon-jolt.yaml b/versioned_docs/version-0.13.0/_embeds/transforms/infinyon-jolt.yaml new file mode 100644 index 00000000..7ea33d69 --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/transforms/infinyon-jolt.yaml @@ -0,0 +1,8 @@ +#transforms.yml +transforms: + - uses: infinyon/jolt@0.4.1 + with: + spec: + - operation: shift + spec: + quote: "" \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/_embeds/versions.tsx b/versioned_docs/version-0.13.0/_embeds/versions.tsx new file mode 100644 index 00000000..0ecc077e --- /dev/null +++ b/versioned_docs/version-0.13.0/_embeds/versions.tsx @@ -0,0 +1,5 @@ +const versions = { + infinyon_http_source: 'infinyon-http-source@0.3.8', +} + +export default versions; \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/cloud/cli/_category_.json b/versioned_docs/version-0.13.0/cloud/cli/_category_.json new file mode 100644 index 00000000..331eb95d --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/cli/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "Cloud CLI", + "collapsed": true, + "position": 30 + } diff --git a/versioned_docs/version-0.13.0/cloud/cli/cluster.mdx b/versioned_docs/version-0.13.0/cloud/cli/cluster.mdx new file mode 100644 index 00000000..4717411c --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/cli/cluster.mdx @@ -0,0 +1,122 @@ +--- +sidebar_position: 20 +title: "cluster" +description: "Cluster functions using the Fluvio CLI" +--- + +The `fluvio cloud cluster` family of commands is used to create, delete, and troubleshoot Fluvio clusters in cloud. + +```bash +$ fluvio cloud cluster -h +``` + +``` +$ fluvio cloud cluster +View Cluster information + +Usage: fluvio cloud cluster + +Commands: + create Create a new Fluvio cluster + delete Delete an existing Fluvio cluster + list List all Fluvio clusters + sync Sync Fluvio cluster profile + usage Print cluster usage stats +``` + +--- + +## `fluvio cloud cluster create` + +This command is used to provision a new cluster. + +```bash +$ fluvio cloud cluster create -h +``` + +``` +$ fluvio cloud cluster create +Create a new Fluvio cluster + +Usage: fluvio cloud cluster create [OPTIONS] + +Options: + --profile The name of the Profile to save + --version Fix the Fluvio version of this cluster (not recommended) + --region The ID of the region in which to create this cluster (beta) +``` + +Choosing a non-default region with `--region` is currently in private beta and not available to the public. + +Specifying `--version` fixes the Fluvio version and prevents the cluster from being auto-upgraded. This is for installing experimental releases and not generally recommended. + +## `fluvio cloud cluster sync` + +This command synchronized the cluster connection info to the Fluvio config on the machine + +```bash +$ fluvio cloud cluster sync -h +``` + +``` +$ fluvio cloud cluster delete +Delete an existing Fluvio cluster +USAGE: + fluvio cloud cluster delete [OPTIONS] + +Options: + --profile The name of the Profile to save +``` + +Example usage: + +```bash +$ fluvio cloud cluster delete my-cluster +``` + +## `fluvio cloud cluster list` + +Command to show the fluvio clusters in Cloud associated with current user. + +```bash +$ fluvio cloud cluster list -h +``` + +``` +$ fluvio cloud cluster list +List all Fluvio clusters +USAGE: + fluvio-cloud cluster list +``` + +Example usage: + +```bash +$ fluvio cloud cluster list + + ID ACTIVE STATE VERSION SPU_COUNT + my-cluster true Installed 0.10.0 1 +``` + +--- + +## `fluvio cloud cluster delete` + +This command deletes the specified cluster + +```bash +$ fluvio cloud cluster delete -h +``` + +``` +$ fluvio cloud cluster delete +Delete an existing Fluvio cluster +USAGE: + fluvio-cloud cluster delete +``` + +Example usage: + +```bash +$ fluvio cloud cluster delete my-cluster +``` diff --git a/versioned_docs/version-0.13.0/cloud/cli/connector.mdx b/versioned_docs/version-0.13.0/cloud/cli/connector.mdx new file mode 100644 index 00000000..935d56f3 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/cli/connector.mdx @@ -0,0 +1,244 @@ +--- +title: connector +weight: 101 +description: "CLI commands for connector operations." +--- + +The `fluvio cloud connector` subcommands are used to manage Connectors in InfinyOn Cloud. + +```bash +$ fluvio cloud connector -h +``` + +``` +fluvio-cloud-connector +View Fluvio Connector information +USAGE: + fluvio-cloud connector +OPTIONS: + -h, --help Print help information +SUBCOMMANDS: + config Show the connector configuration details + create Create a new Connector + delete Delete a Connector + help Print this message or the help of the given subcommand(s) + list List all Connectors + logs View connector logs + update Update and restart a Connector +``` + +-> For more info about using connectors, see the [Connectors page]. The available connector types are listed under the *Inbound* and *Outbound* sections. + +--- + +## `fluvio cloud connector create` + +This command is used to provision a new connector. + +```bash +$ fluvio cloud connector create -h +``` + +``` +fluvio cloud connector create +Create a new Connector + +Usage: fluvio cloud connector create [OPTIONS] --config + +Options: + --config Path to config file + --log-level Sets the log level, one of (error, warn, info, debug, trace) + -c, --cluster Name of cluster + -h, --help Print help +``` + +To create a connector, you need to create a YAML-based connector config file. + +For more about the connector config file, see the [Cloud connectors page]. + +When running `fluvio cloud connector create`, pass the path to this file using the `--config` +option. + +Example usage: + +```bash +$ fluvio cloud connector create --config=./cats.yaml +connector "cat-facts" (http-source) created +``` + +--- + +## `fluvio cloud connector config` + +Command to show the configuration file used to create this connector. + +```bash +$ fluvio cloud connector config -h +``` + +``` +fluvio-cloud-connector-config +Show the connector configuration details +USAGE: + fluvio-cloud connector config [OPTIONS] +ARGS: + Name of connector +OPTIONS: + -c, --cluster Name of cluster + -h, --help Print help information +``` + +Example usage: + +```yaml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: cat-facts + type: http-source + topic: cat-facts +http: + endpoint: "https://catfact.ninja/fact" + interval: 10s +``` + +:::info +All versions are marked with `x.y.z`. To find the latest version, run: +* `fluvio hub connector list` +* `fluvio hub smartmodule list` +::: + +--- + +## `fluvio cloud connector list` + +This command show you all the existing Connectors in your cluster. + +```bash +$ fluvio cloud connector list -h +``` + +``` +fluvio-cloud-connector-list +List all Connectors +USAGE: + fluvio-cloud connector list [OPTIONS] +OPTIONS: + -c, --cluster Name of cluster +``` + +--- + +## `fluvio cloud connector update` + +Command to update and restart an existing connector. + +```bash +$ fluvio cloud connector update -h +``` + +``` +fluvio cloud connector update +Update and restart a Connector + +Usage: fluvio-cloud connector update [OPTIONS] --config + +Options: + -c, --config Name of connector + --cluster Name of cluster + --log-level Sets the log level [default: LogLevel::default()] +``` + +Example usage: + +```bash +$ fluvio cloud connector update --config=./cats.yaml +connector "cat-facts" (http-source) updated +``` + +--- + +## `fluvio cloud connector logs` + +Command to view the logs written by the connector. + +```bash +$ fluvio cloud connector logs -h +``` + +``` +fluvio-cloud-connector-logs +View connector logs +USAGE: + fluvio-cloud connector logs [OPTIONS] +ARGS: + Name of connector +OPTIONS: + -c, --cluster Name of cluster +``` + +Example usage: + +```bash +$ fluvio cloud connector logs cat-facts +2022-10-21T14:55:13.508989Z INFO http_source: Starting HTTP source connector connector_version="0.4.1" git_hash="0ad913c5ceb732881fd753874e5082777bbed91e" +2022-10-21T14:55:13.509096Z INFO http_source: interval=10s method=GET topic=cat-facts output_parts=body output_type=text endpoint=https://catfact.ninja/fact +2022-10-21T14:55:13.510284Z INFO fluvio::config::tls: Using verified TLS with certificates from paths domain="broad-union-b685e7fda03fefb3d5221d0a3b9c64c7.c.infinyon.cloud" +2022-10-21T14:55:13.515459Z INFO fluvio::fluvio: Connecting to Fluvio cluster fluvio_crate_version="0.14.0" fluvio_git_hash="e96d8e2738ee39ddbb64fea37134f119f97e25bf" +2022-10-21T14:55:13.574584Z INFO connect: fluvio::sockets: connect to socket add=fluvio-sc-public:9003 +... +``` + +### Configure Logging Levels + +By default connectors will log using the `info` level. +You can configure the log level for connectors running in the cloud using +the `--log-level` option. + +The `--log-level` option is available for both, `fluvio cloud connector `. + +Any of the following levels can be used: + +- `error` +- `warn` +- `info` +- `debug` +- `trace` + +--- + +## `fluvio cloud connector delete` + +This command deletes an existing Connector. + +```bash +$ fluvio cloud connector delete -h +``` + +``` +fluvio-cloud-connector-delete +Delete a Connector +USAGE: + fluvio-cloud connector delete [OPTIONS] ... +ARGS: + ... One or more name(s) of the connector(s) to be deleted +OPTIONS: + -c, --cluster Name of cluster +``` + +Example usage: + +```bash +$ fluvio cloud connector delete cat-facts +connector "cat-facts" deleted +``` + +## References + +* [How to use Connectors] + +[How to use Connectors]: cloud/how-to/use-connectors.mdx + + +[Connectors page]: connectors/overview.mdx +[Cloud connectors page]: cloud/how-to/use-connectors.mdx \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/cloud/cli/overview.mdx b/versioned_docs/version-0.13.0/cloud/cli/overview.mdx new file mode 100644 index 00000000..8c6eab9f --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/cli/overview.mdx @@ -0,0 +1,23 @@ +--- +sidebar_position: 1 +title: "Overview" +description: "InfinyOn cloud CLI overview" +--- + +The InfinyOn cloud provides management via the CLI via the `fluvio cloud` set of commands. +`fluvio cloud` is part of the overall [Fluvio CLI]. + +The commands include the following. + +- [`fluvio cloud cluster`] +- [`fluvio cloud connector`] +- [`fluvio cloud secret`] +- [`fluvio cloud usage`] +- [`fluvio cloud webhook`] + +[Fluvio CLI]: fluvio/cli/overview.mdx +[`fluvio cloud cluster`]: cluster.mdx +[`fluvio cloud connector`]: connector.mdx +[`fluvio cloud secret`]: secret.mdx +[`fluvio cloud usage`]: usage.mdx +[`fluvio cloud webhook`]: webhook.mdx diff --git a/versioned_docs/version-0.13.0/cloud/cli/secret.mdx b/versioned_docs/version-0.13.0/cloud/cli/secret.mdx new file mode 100644 index 00000000..4660826a --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/cli/secret.mdx @@ -0,0 +1,123 @@ +--- +title: secret +weight: 102 +description: "CLI commands for secrets management." +--- + +Fluvio cloud secrets are set via the CLI. Each secret is a named value with all secrets sharing a namespace per account. Connector configuration files can refer to secrets by name, and the cloud connector infrastructure will provision the connector with the named secrets. + +Due to security concerns, listing actual secret values or downloading them after they have been set is not allowed. However, a listing of secret names as well as what date they were last set is accessible. + +## `fluvio cloud secret` subcommands + +The secrets CLI is an added subcommand fluvio cloud as 'fluvio cloud secret'. + +Actions possible with a fluvio cloud secret are: +- set +- delete +- list + +```bash +fluvio cloud secret set +fluvio cloud secret list +fluvio cloud secret delete +``` + +## `fluvio cloud secret set` + +Setting a secret of `` will allow it to be referenced by that name in connector configuration parameters that can use secret references. + +```bash +fluvio cloud secret set +``` +All secrets are in a shared connector namespace, but a specific connector is only given access to secrets named in the configuration file of the connector. + +## `fluvio cloud secret list` + +`fluvio cloud secret list` will list only the secret names and their last update time. Once a secret has been set into fluvio cloud, it is stored so only referencing connectors may access the secret. There is no way to retrieve the secret value from fluvio cloud. + +```bash +$ fluvio cloud secret list +SecretNames LastUpdate +CAT_FACTS_CLIENT_ID 12-10-2022 1:07pm +CAT_FACTS_SECRET 01-02-2023 12:01am +``` + + +## `fluvio cloud secret delete` + +This will delete the named secret. + +```bash +fluvio cloud secret delete +``` + +## Connector config file references + +The connector config files can reference cloud secrets by NAME. They need to be referenced on meta section of connector config. And then we can use the secret name in the connector configuration parameters. The secret can be used in the configuration as `${{ secrets. }}`. + +```yaml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: my-connector + type: package-name + topic: a-topic + secrets: + - name: CAT_FACTS_CLIENT_ID + - name: CAT_FACTS_SECRET +# named section for custom config parameters, usually a short name like "http", or "mqtt" +: + param_client_id: ${{ secrets.CAT_FACTS_CLIENT_ID }} + param_client_secret: ${{ secrets.CAT_FACTS_SECRET }} +``` + +## Example + +An example of a connector that can use secret parameters, the http connector might be setup and configured as follows. + +1. Setup a secret + +```bash +$ fluvio cloud secret set AUTH_HEADER "1234abcd" +``` + + +2. Write a connector config `http-config-with-secret.yaml` + +```bash +$ cat << END_CONFIG > http-config-with-secret.yaml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: cat-facts + type: http-source + topic: cat-facts-secret + secrets: + - name: AUTH_HEADER +http: + endpoint: "https://catfact.ninja/fact" + interval: 10s + headers: + - "Authorization: bearer ${{ secrets.AUTH_HEADER }}" +END_CONFIG +``` + + +3. Run the connector + +```bash +$ fluvio cloud connector --config http-config-with-secret.yaml +``` + + +This same configuration file is compatible with both fluvio cloud connectors and cdk locally run connectors. The cloud connectors are provisioned via the `fluvio cloud secret ...` set of commands, while the cdk secrets are provided locally. + + +## References + +* [How to Use Secrets] + + +[How to Use Secrets]: cloud/how-to/use-secrets.mdx + diff --git a/versioned_docs/version-0.13.0/cloud/cli/usage.mdx b/versioned_docs/version-0.13.0/cloud/cli/usage.mdx new file mode 100644 index 00000000..880a4a5f --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/cli/usage.mdx @@ -0,0 +1,19 @@ +--- +title: usage +weight: 103 +description: "CLI commands to check your cluster usage status." +--- + +Query the CPU and memory usage of your SPUs. + +## `fluvio cloud usage` + +Example usage: + +```bash +$ fluvio cloud usage + SPU CPU Memory + main-0 1066037n 3168Ki + +Note: CPU usage is expressed in nanocores. 1 nanocore is equal to 1 billionth of 1 core. +``` diff --git a/versioned_docs/version-0.13.0/cloud/cli/webhook.mdx b/versioned_docs/version-0.13.0/cloud/cli/webhook.mdx new file mode 100644 index 00000000..033a6e1f --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/cli/webhook.mdx @@ -0,0 +1,164 @@ +--- +title: webhook +weight: 100 +description: "CLI commands for webhook operations." +--- + +The `fluvio cloud webhook` family of commands is used to create, delete, and troubleshoot Webhooks in cloud. + +```bash +$ fluvio cloud webhook -h +``` + +``` +Manage Webhooks + +Usage: fluvio-cloud webhook + +Commands: + create Create webhook + delete Delete webhook + list List webhooks + update Update webhook + logs View webhook connector logs +``` + +## `fluvio cloud webhook create` + +This command is used to provision a new cluster. + +```bash +$ fluvio cloud webhook create -h +``` + +``` +Create webhook + +Usage: fluvio-cloud webhook create [OPTIONS] [NAME] + +Arguments: + [NAME] Name of webhook + +Options: + --topic + --config Webhook config + -c, --cluster Name of cluster +``` + +Example usage: + +```bash +$ fluvio cloud webhook create --config webhook-config.yaml +Webhook "my-webhook" created with url: https://infinyon.cloud/webhooks/v1/ +``` +See the [Webhook config reference] for more on config files. + +## `fluvio cloud webhook list` + +Command to show the fluvio clusters in Cloud associated with current user. + +```bash +$ fluvio cloud webhook list -h +``` + +``` +List webhooks + +Usage: fluvio-cloud webhook list + +Options: + -c, --cluster Name of cluster +``` + +Example usage: + +```bash +$ fluvio cloud webhook list + NAME TOPIC URL + my-webhook my-topic https://infinyon.cloud/webhooks/v1/ +``` + +## `fluvio cloud webhook delete` + +This command deletes current cluster of current user. + +```bash +$ fluvio cloud webhook delete -h +``` + +``` +Delete webhook + +Usage: fluvio-cloud webhook delete + +Arguments: + Name of webhook + +Options: + -c, --cluster Name of cluster +``` + +Example usage: + +```bash +$ fluvio cloud webhook delete my-webhook +Webhook "my-webhook" deleted +``` + +## `fluvio cloud webhook update` + +```bash +$ fluvio cloud webhook update -h +``` + +``` +Update webhook + +Usage: fluvio-cloud webhook update --config + +Options: + --config Webhook config + -c, --cluster Name of cluster +``` + +Example usage: + +```bash +$ fluvio cloud webhook update --config webhook-config.yaml +``` + +See the [Webhook config reference] for more on config files. + +## `fluvio cloud webhook logs` + +```bash +$ fluvio cloud webhook logs -h +``` + +``` +View webhook connector logs + +Usage: fluvio-cloud webhook logs + +Arguments: + Name of webhook + +Options: + -c, --cluster Name of cluster + +``` + +Example usage: + +```bash +$ fluvio cloud webhook logs my-webhook +[Log output] +``` + +## References + +* [How to use Webhooks] +* [Webhook config reference] + +[How to use Webhooks]: cloud/how-to/use-webhooks.mdx +[Webhook config reference]: cloud/concepts/webhook-config.mdx \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/cloud/concepts/_category_.json b/versioned_docs/version-0.13.0/cloud/concepts/_category_.json new file mode 100644 index 00000000..a74083c7 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/concepts/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "Concepts", + "collapsed": true, + "position": 50 +} diff --git a/versioned_docs/version-0.13.0/cloud/concepts/webhook-config.mdx b/versioned_docs/version-0.13.0/cloud/concepts/webhook-config.mdx new file mode 100644 index 00000000..f0bb51a5 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/concepts/webhook-config.mdx @@ -0,0 +1,51 @@ +--- +title: Webhook Configuration +weight: 100 +description: "Reference for Webhook configs" +--- + +## Webhook config template +```yaml +# example-webhook-template.yaml +meta: + name: my-webhook + topic: my-topic +# optional +transforms: + - uses: smartmodule_name + with: + param_name: param_value +# optional +webhook: + outputParts: [body | full (default)] + outputType: [text | json (default)] +``` + +## Config options +### Meta +* `name` - The name of your webhook +* `topic` - The name of the topic you want events to be stored. It will be automatically created if it doesn't exist. + +### Transforms +Webhook connectors support `transforms`. Records can be modified before they are sent to the topic. The transforms section is a list of transform objects. Each transform object has an uses and a with section. + +* `uses` is the reference to the SmartModule used in the transform. + * `with` is the configuration for the transform + * The section is different for each transform + * See the connectors reference documentation for available configuration options + +## Webhook +The output record from the webhook request is configurable + +`outputParts` options: +* `full` - Return the headers and body of the request (Default) +* `body` - Only return the body of the request + +`outputType` options: +* `json`- Output is parsed into json (Default) +* `text` - Output is plaintext + + +## References + +* [How to Use WebHooks]: ../how-to/use-webhooks.mdx diff --git a/versioned_docs/version-0.13.0/cloud/how-to/_category_.json b/versioned_docs/version-0.13.0/cloud/how-to/_category_.json new file mode 100644 index 00000000..b608de59 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/how-to/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "How To", + "collapsed": false, + "position": 10 +} diff --git a/versioned_docs/version-0.13.0/cloud/how-to/use-connectors.mdx b/versioned_docs/version-0.13.0/cloud/how-to/use-connectors.mdx new file mode 100644 index 00000000..e6dfbdf2 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/how-to/use-connectors.mdx @@ -0,0 +1,152 @@ +--- +title: "Use Connectors" +description: "A short guide on using Cloud Connectors" +sidebar_position: 1 +--- + +Connectors with [InfinyOn Cloud] is the best option for those who want to manage their data pipelines in one place. + +Configuring connectors works the same way with InfinyOn Cloud, as it does locally. + +You can create and maintain your connectors in a more streamlined way through the [Cloud CLI]. + +## Create your first connector on InfinyOn Cloud + +This guide will walk you through creating an Inbound HTTP connector to ingest json data from and HTTP endpoint. + +To follow this guide you will need to sign up for [InfinyOn Cloud] and log into the CLI. + + +```bash +fluvio cloud login +``` + +:::tip + Check out the [Cloud CLI docs] for more details about logging into the CLI. +::: + +### Example HTTP connector + +This is the config file for the [Inbound HTTP connector] in this guide. + +```yaml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: cat-facts + type: http-source + topic: cat-facts +http: + endpoint: "https://catfact.ninja/fact" + interval: 10s +``` + +:::info +All versions are marked with `x.y.z`. To find the latest version, run: +* `fluvio hub connector list` +* `fluvio hub smartmodule list` +::: + +In this config, we are creating a connector named `cat-facts`. It will request data from a cat fact API once every 30 seconds and receive json data. The connector will store the json into a topic called `cat-facts-data` + + +#### Start a connector + +You can create a connector by using `fluvio cloud connector create` with the example connector. + +```bash copy="fl" +$ fluvio cloud connector create --config catfacts-basic-connector.yml +connector "cat-facts" (http-source) created +``` +#### List all connectors + +After the connector is created, you can list the connectors you've created, and view their current status. + +```bash copy="fl" +$ fluvio cloud connector list + NAME TYPE VERSION CDK STATUS + cat-facts http-source x.y.z V3 Running +``` + +#### Look at connector logs + +If there is a need to debug the behavior of a connector, the logs are available by running `fluvio cloud connector logs cat-facts` + +```bash copy="fl" +$ fluvio cloud connector logs cat-facts +connector-startup infinyon/http-source@x.y.z +2023-03-25T03:41:29.570294Z INFO surf::middleware::logger::native: sending request +2023-03-25T03:41:29.702213Z INFO surf::middleware::logger::native: request completed +2023-03-25T03:41:29.702258Z INFO connector_startup::startup: downloading package url="https://hub.infinyon.cloud/hub/v0/connector/pkg/infinyon/http-source/x.y.z" +2023-03-25T03:41:29.702290Z INFO surf::middleware::logger::native: sending request +2023-03-25T03:41:29.993001Z INFO surf::middleware::logger::native: request completed +2023-03-25T03:41:30.108220Z INFO connector_startup::startup: writing file file="connector.ipkg" +... checking package +2023-03-25T03:41:30.301199Z INFO connector_startup::startup: connector binary from package path="./http-source" +2023-03-25T03:41:30.301224Z INFO connector_startup::startup: Starting deployment +Connector runs with process id: 15 +2023-03-25T03:41:30.303333Z INFO http_source: Reading config file from: /home/fluvio/config.yaml +2023-03-25T03:41:30.303526Z INFO http_source: starting processing +2023-03-25T03:41:30.304337Z INFO fluvio::config::tls: Using verified TLS with certificates from paths domain="odd-butterfly-0dea7a035980a4679d0704f654e1a14e.c.cloud-dev.fluvio.io" +2023-03-25T03:41:30.308822Z INFO fluvio::fluvio: Connecting to Fluvio cluster fluvio_crate_version="0.16.0" fluvio_git_hash="8d4023ee0dc7735aaa0c823dd2b235662112f090" +2023-03-25T03:41:30.369634Z INFO connect: fluvio_socket::versioned: connect to socket add=fluvio-sc-public:9003 +2023-03-25T03:41:30.412895Z INFO connect:connect_with_config: fluvio::config::tls: Using verified TLS with certificates from paths domain="odd-butterfly-0dea7a035980a4679d0704f654e1a14e.c.cloud-dev.fluvio.io" +2023-03-25T03:41:30.473242Z INFO connect:connect_with_config:connect: fluvio_socket::versioned: connect to socket add=fluvio-sc-public:9003 +2023-03-25T03:41:30.582726Z INFO dispatcher_loop{self=MultiplexDisp(12)}: fluvio_socket::multiplexing: multiplexer terminated +2023-03-25T03:41:30.632722Z INFO fluvio_connector_common::monitoring: using metric path: /fluvio_metrics/connector.sock +2023-03-25T03:41:30.632795Z INFO fluvio_connector_common::monitoring: monitoring started +2023-03-25T03:41:31.172075Z INFO run:create_serial_socket_from_leader{leader_id=0}:connect_to_leader{leader=0}:connect: fluvio_socket::versioned: connect to socket add=fluvio-spu-main-0.acct-ce0c1782-ca61-4c54-a08c-3ba985524553.svc.cluster.local:9005 +``` + +#### View data in topic + +The HTTP connector should be receiving data and storing it in a topic with the name we specified. + +```shell copy="fl" +$ fluvio topic list + NAME TYPE PARTITIONS REPLICAS RETENTION TIME COMPRESSION STATUS REASON + cat-facts-data computed 1 1 7days any resolution::provisioned +``` + +To verify, you can consume from the topic with the `fluvio consume` CLI. + +We are using the `-B` option to start from the beginning offset of the topic. Once you reach the end of the topic, you can see new data as it is sent to the topic. To exit this live view, press `Ctrl+C`. + +:::tip + Using the `--disable-continuous` flag with `fluvio consume` will exit the stream once the last record has printed to screen +::: + +```shell +$ fluvio consume cat-facts-data -B +{"fact":"Female felines are \\superfecund","length":31} +{"fact":"Cats only sweat through their paws and nowhere else on their body","length":65} +{"fact":"While many parts of Europe and North America consider the black cat a sign of bad luck, in Britain and Australia, black cats are considered lucky.","length":146} +^C +``` + +#### Delete a connector + +When you want to stop the connector, you can delete it with `fluvio cloud connector cat-facts` + +```shell copy="fl" +$ fluvio cloud connector delete cat-facts +connector "cat-facts" deleted +``` + +Deleting your connector will not delete the topic used by the connector. If you want to delete the topic, you can run `fluvio topic delete cat-facts` + +```shell copy="fl" +$ fluvio topic delete cat-facts +topic "cat-facts" deleted +``` + +### Conclusion + +We created a basic Inbound HTTP connector, looked at the logs for the connector, and viewed the HTTP response data in the Fluvio topic. Lastly, we deleted the connector and topic. + +You are ready to create your own connectors! Check out the docs for our supported Inbound and Outbound connectors to get started with your own data sources. + + +[InfinyOn Cloud]: https://infinyon.cloud +[Cloud CLI]: cloud/cli/overview.mdx +[Cloud CLI docs]: cloud/cli/overview.mdx diff --git a/versioned_docs/version-0.13.0/cloud/how-to/use-secrets.mdx b/versioned_docs/version-0.13.0/cloud/how-to/use-secrets.mdx new file mode 100644 index 00000000..df2c0520 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/how-to/use-secrets.mdx @@ -0,0 +1,49 @@ +--- +sidebar_position: 3 +title: "Use Secrets" +description: "A short guide on using Cloud Secret" +--- + +Manage sensitive data like passwords and API keys securely in Fluvio Cloud using the `fluvio cloud secret` command. For more details, see the [Cloud CLI docs][cli-secret]. + +## Secret names + +For the sake of portability, secret names must +* Consist solely of letters, digits, and the underscore (`_`) +* Must not begin with a digit. + +## Set secret values on Cloud + +```shell copy="fl" +$ fluvio cloud secret set my_secret my-secret-value +Secret "my_secret" set successfully +``` + +## Using secrets with connectors + +Cloud Connectors support secrets. The secrets can be referenced in the connector configuration file. + +In order to use secrets, first, they need to be defined in the metadata section of the connector configuration file. The secrets are defined as a list of names. The names are used to reference the secret in the connector configuration file. + + +```yaml +# http-source-connector-with-secrets.yml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: my-http-source-connector + type: http-source + topic: my-topic + secrets: + - name: MY_TOKEN + +http: + endpoint: https://my.secure.api/ + interval: 10s + headers: + - "AUTHORIZATION: token ${{ secrets.MY_TOKEN }}" +``` + +In that config, it is defined a `MY_TOKEN` secret and it is used in the `headers` configuration of the http-source connector. + +[cli-secret]: cloud/cli/secret.mdx diff --git a/versioned_docs/version-0.13.0/cloud/how-to/use-webhooks.mdx b/versioned_docs/version-0.13.0/cloud/how-to/use-webhooks.mdx new file mode 100644 index 00000000..3cd05a56 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/how-to/use-webhooks.mdx @@ -0,0 +1,156 @@ +--- +title: "Use Webhooks" +description: "A short guide on using Cloud Webhooks" +sidebar_position: 2 +--- + +This tutorial assumes that `fluvio` is installed, and logged-in to InfinyOn Cloud. Follow the [Quick Start] to get set up. + +Webhooks are special connectors with an associated external url. Users can send data to their topics via a `HTTP POST` request. + +## Webhook Configuration + +The webhook configuration file has the following structure: + +```yaml +meta: + name: my-webhook # required + topic: my-webhook # required + logLevel: debug # default: info, options: [trace, debug, info, warn, error] + secrets: + - name: my-key + producer: + linger: 100ms + batch-size: 1mb + compression: snappy # default: none, options: [none, gzip, snappy, lz4, zstd] +webhook: + outputParts: full # default: body, options: [full, body] + outputType: json # default: text, options: [text, json] + outputUri: full # default: none, options: [full, none, path, query] +transforms: + - uses: infinyon/jolt@x.y.z + with: + key: ${{ secrets.my_key }} +``` + +The minimum configuration has needs the top 2 required fields: `name` and `topic`. The `name` is the name of the webhook, and the `topic` is the topic that the webhook will publish to. + + +## Create a simple Webhook + +Let's create an example configuration file + +```yaml +# example-webhook.yaml +meta: + name: my-webhook + topic: my-webhook-topic +``` + +Add a webhook to InfinyOn Cloud + +```shell copy="fl" +$ fluvio cloud webhook create --config example-webhook.yaml +``` + +Your output should look similar to this. We'll cover sending data to this url. + +``` +Webhook "my-webhook" created with url: https://infinyon.cloud/webhooks/v1/[random string] +``` + +If you need this url again, you can run this command to list your webhooks, and their urls. + +```shell copy="fl" +$ fluvio cloud webhook list +``` + +Example output + +``` + NAME TOPIC URL + my-webhook my-webhook-topic https://infinyon.cloud/webhooks/v1/[random string] +``` + +### Send data to webhook + +We'll be sending json data `{"key": "value"}` to our webhook using `curl`. Replace the url so `[random string]` matches your unique url. Keep this command close because we'll refer to this example curl command later. + +```shell copy="fl" +$ curl -v -X POST -d 'Hello World!' https://infinyon.cloud/webhooks/v1/[uri-key] +``` + +In another terminal, star a consumer that reads form the beginning: + +```shell copy="fl" +$ fluvio consume my-webhook-topic -B +``` + +We should see the following output: + +```bash +Hello World! +``` + +## Create a JSON Webhook and embed HTTP parts + +Next we'll send json records, but before we do that we'll modify `outputParts`, `outputType`, and `outputUri` in the `example-webhook.yaml` configuration file: + +```yaml +# example-webhook.yaml +meta: + name: my-webhook + topic: my-webhook-topic + +webhook: + outputParts: full + outputType: json + outputUri: full +``` + +Run this command to update your webhook. + +```shell +$ fluvio cloud webhook update --config example-webhook.yaml +Webhook "my-webhook" updated +``` + +Let's restart the consumer to a JSON formatter: + +```shell +$ fluvio consume my-webhook-topic -O json +``` + +Run another curl with a `json` payload: + +```bash +$ curl -v -X POST -H "Content-Type: application/json" -d '{"key": "value"}' https://infinyon.cloud/webhooks/v1/[uri-key] +``` + +The consumer should now show the following output: + +```json +{ + "body": { + "key": "value" + }, + "headers": { + "accept": "*/*", + "accept-encoding": "gzip, br", + "content-length": "16", + "content-type": "application/json", + "host": "infinyon.cloud", + "user-agent": "curl/x.y.z", + "x-forwarded-for": "..." + }, + "path": "", + "query": "" +} +``` + +## Conclusion + +You now know how to create and configure the output of Webhooks. Check out the [Webhook Config] reference to see how to configure other transformations. + +[Quick Start]: fluvio/quickstart.mdx +[Webhook Config]: cloud/concepts/webhook-config.mdx \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/cloud/overview.mdx b/versioned_docs/version-0.13.0/cloud/overview.mdx new file mode 100644 index 00000000..f86a0330 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/overview.mdx @@ -0,0 +1,28 @@ +--- +sidebar_position: 1 +title: Overview +description: "InfinyOn Cloud - Fully managed Fluvio and Stateful DataFlow as a service." +--- + +# InfinyOn Cloud Overview + +InfinyOn Cloud is the fully managed deployment of [Fluvio] and [Stateful DataFlow] as a service for developers to deploy and scale intelligent applications with confidence. + +InfinyOn Cloud offers plans for solo developers to test drive Fluvio clusters on the cloud with minimal friction. Solo builders can try out InfinyOn Cloud with in our preferred availability zone with a discounted cluster fee and generous trial credits. InfinyOn Cloud also offers `Sandbox`, `Professional` and `Enterprise` plans for serious workloads. + +To test workflows on InfinyOn cloud builders need to fill out the access request form and we will give access to the appropriate plan. + +InfinyOn Cloud offers the following upgrades on Self-Hosted Fluvio across the different plans: +- Single User Clusters in US on the Solo plan +- Multi User Clusters US on the Sandbox plan +- Multi User Clusters in preferred availability zone on Pro Plan or higher +- Role based access control in Pro and Enterprise Plan +- Multi User Multi Region on the Enterprise Plan +- Partition scaling on the Pro Plan or higher +- Advanced tracing on the Pro Plan or higher +- Zero Trust Encryption in Enterprise +- Dedicated support and SLAs +- SOC2, HIPAA, ISO, and additional compliance on Enterprise Plan. + +[Fluvio]: fluvio/overview.mdx +[Stateful DataFlow]: /sdf/quickstart diff --git a/versioned_docs/version-0.13.0/cloud/quickstart.mdx b/versioned_docs/version-0.13.0/cloud/quickstart.mdx new file mode 100644 index 00000000..f9526e21 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/quickstart.mdx @@ -0,0 +1,376 @@ +--- +sidebar_position: 0 +title: Quickstart +description: "Quickstart to get up and running quickly with InfinyOn Cloud" +--- + +# InfinyOn Cloud Quickstart + +This guide will outline using the [Fluvio CLI] with [InfinyOn Cloud] or a local [Fluvio Cluster]. + +1. [Install Fluvio Client (CLI)](#1-installing-fluvio-client-cli) +2. [Create an InfinyOn Cloud Account](#2-create-an-infinyon-cloud-account) +2. [Start a Cluster](#3-start-a-cluster) +3. [Produce & Consume Records](#4-produce-and-consume-records) +4. [Use Connectors](#5-use-connectors) +5. [Use StartModules](#6-use-smartmodules) +6. [Build Stateful Dataflows](#6-build-stateful-dataflows) + +Let's get started. + +## 1. Installing Fluvio Client (CLI) + +You'll need to download and install the CLI. + +```bash copy="fl" +$ curl -fsS https://hub.infinyon.cloud/install/install.sh | bash +``` + +This command will download the Fluvio Version Manager (fvm), Fluvio CLI (fluvio) and config files into `$HOME/.fluvio`, with the executables in `$HOME/.fluvio/bin`. To complete the installation, you must add the executables to your shell `$PATH`. + + +## 2. Create an InfinyOn Cloud Account + +Head over to the [InfinyOn Cloud sign-up page] and create an account. Depending on which method you choose in account creation, you can log in with OAuth2 or username/password. + +Login to InfinyOn Cloud using **Oauth**: + +```bash copy="fl" +$ fluvio cloud login --use-oauth2 +A web browser has been opened at https://infinyon-cloud.us.auth0.com/activate?user_code=GLMC-QDDJ. +Please proceed with authentication. +``` + +Or, login with your **Username/Password**: + +```bash copy="fl" +$ fluvio cloud login +InfinyOn Cloud email: john@example.com +Password: +``` + + +## 3. Start a Cluster + +Use [Fluvio CLI] to start a cluster on InfinyOn Cloud. Start cluster on **InfinyOn Cloud** (must be logged in as intructed in [section 2](#2-create-an-infinyon-cloud-account)): + +```bash copy="fl" +$ fluvio cloud cluster create +``` + +Run the following command to check the CLI and the cluster platform versions: + +```bash copy="fl" +$ fluvio version +``` + + +## 4. Produce and Consume records + +### Create your first topic + +Topics are used to store data and send data streams. + +You can create a topic with the following command: + +```bash copy="fl" +$ fluvio topic create quickstart-topic +``` + +Where `quickstart-topic` is the name of your topic + +Read more [about Topics in the Fluvio docs]. + + +### Produce data to your topic + +You can send data (aka *produce*) to your topic. + +Let's try to produce text to your topic interactively: + +```bash copy="fl" +$ fluvio produce quickstart-topic +> hello world! +Ok! +``` + +Typing anything and then pressing `Enter` will send a record to your topic. + +Press `Ctrl+C` to exit the interactive producer prompt. + +:::tip + You may also use the following commands: + + ```bash copy="fl" + $ fluvio produce quickstart-topic + > hello world! + Ok! + ``` + + Or pipe output to `fluvio`: + + ```bash copy="fl" + echo "hello world!" | fluvio produce quickstart-topic + ``` +::: + +Read more [about Producers in the Fluvio docs]. + + +### Consume data from your topic + +You can read data (aka *consume*) from your topic. + +This command will create a consumer that listens to your topic for new records and then prints it to the screen: + +```bash copy="fl" +$ fluvio consume quickstart-topic +Consuming records from the end of topic 'quickstart-topic'. This will wait for new records +``` + +To see this in action, open another terminal and produce new data. + +To see previously sent data, you can add an option to your consume command to request a starting offset with the `-B ` flag. + +```bash copy="fl" +$ fluvio consume quickstart-topic -B -d +hello world! +``` + +**Flags**: +* `-B` - reads from the beginning of the stream (defaults to `0` if no value supplied). +* `-d` - closes the consumer connection after all data has been sent. + +Read more [about Consumers in the Fluvio docs]. + + +## 5. Use Connectors + +InfinyOn offers [a growing number of connectors] to communicate with external services. +In this example, we will be covering the [HTTP Source] connector. The connector polls data from an HTTP endpoint that returns [a random quote] every 3 seconds to a topic called `quotes`. + +Save the following configuration file on your machine: + +```yaml +# quotes-source-connector.yml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: http-quotes + type: http-source + topic: quotes +http: + endpoint: https://demo-data.infinyon.com/api/quote + interval: 3s +``` + +You may run the connector on InfinyOn Cloud, or your local machine. + + +### Run HTTP Connector on InfinyOn Cloud + +To start a connector in **InfinyOn Cloud**, use the following command: + +```bash copy="fl" +$ fluvio cloud connector create -c quotes-source-connector.yml +``` + +Use the following command to see the connector status. + +```bash copy="fl" +$ fluvio cloud connector list + NAME TYPE VERSION CDK STATUS LOG-LEVEL + http-quotes http-source x.y.z V3 Running info +``` + +We can monitor new data in the connector's topic with `fluvio consume quotes` + +```bash copy="fl" +$ fluvio consume quotes +Consuming records from 'quotes' +{"quote":"We cannot solve our problems with the same thinking we used when we created them.","by":"Albert Einstein"} +{"quote":"Whatever you are, be a good one.","by":"Abraham Lincoln"} +{"quote":"You can't build a reputation on what you're going to do.","by":"Henry Ford"} +{"quote":"Success is not final, failure is not fatal: It is the courage to continue that counts.","by":"Winston Churchill"} +``` + +You may delete your cloud connector with the following command: `fluvio cloud connector delete http-quotes`. + +Read more [about Connectors in the Fluvio docs]. + + +## 6. Use SmartModules + +SmartModules are user-defined functions compiled into [WebAssembly] and applied to data streaming for inline data manipulation. You can use SmartModules in the producers, consumers, as well as Connectors. InfinyOn has several pre-compiled SmartModules that you can use out of the box. Alternatively, you use [SmartModule Developer Kit (smdk)] to build your own. + + +### Download a Smartmodule from the Hub + +InfinyOn Hub has a growing library of SmartModules available for download: + +```bash copy="fl" +$ fluvio hub smartmodule list + SMARTMODULE Visibility + infinyon-labs/array-map-json@x.y.z public + infinyon-labs/dedup-filter@x.y.z public + infinyon-labs/json-formatter@x.y.z public + infinyon-labs/key-gen-json@x.y.z public + infinyon-labs/regex-map-json@x.y.z public + infinyon-labs/regex-map@x.y.z public + infinyon-labs/rss-json@x.y.z public + infinyon-labs/stars-forks-changes@x.y.z public + infinyon/jolt@x.y.z public + infinyon/json-sql@x.y.z public + infinyon/regex-filter@x.y.z public +``` + +In the example, we'll use a [SmartModule called jolt] to turn json records into sentences. + +Let's download the Smartmodule to our cluster: + +```bash copy="fl" +$ fluvio hub smartmodule download infinyon/jolt@x.y.z +... cluster smartmodule install complete +``` + +Check the cluster to ensure it has been successfully downloaded: + +```bash copy="fl" +$ fluvio smartmodule list + SMARTMODULE SIZE + infinyon/jolt@x.y.z 611.5 KB +``` + +Next, we'll create a transform file and test the output. + + +### Create a SmartModule transformation file + +SmartModules can be chained together and often require additional parameters. Fluvio uses a YAML file is used to define the transformations. + +Create a `transforms.yaml` file and copy/paste the following definition: + +```yaml +# File: transforms.yaml +transforms: + - uses: infinyon/jolt@x.y.z + with: + spec: + - operation: shift + spec: + quote: "" +``` + +Jolt is a complex Smartmodule that allows you to perform multiple types of JSON transformations. For additional information, check out the [SmartModule Jolt] docs. + + +### Test the SmartModule + +As the `quotes` are readily available for us in the `quotes` topic, we'll use the consumer command to test this SmartModule. + +```bash copy="fl" +$ fluvio consume quotes --transforms-file transforms.yaml -T=2 +Consuming records from 'quotes' starting 2 from the end of log +"The greatest glory in living lies not in never falling, but in rising every time we fall." +"Simplicity is the ultimate sophistication." +``` + +We are consuming the last two quotes topic records and transforming the json into a string. + + +### Apply the Smartmodule to the Connector + +Let's say we don't use the authors in the quotes; instead, only the quote represented strings. We can accomplish this result by simply applying the transformation to the connector. + +Let's create a new `http-source` connector and add the transformation: + +```yaml +# string-quotes-source-connector.yml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: string-quotes + type: http-source + topic: string-quotes +http: + endpoint: https://demo-data.infinyon.com/api/quote + interval: 3s +transforms: + - uses: infinyon/jolt@x.y.z + with: + spec: + - operation: shift + spec: + quote: "" +``` + +Ready to run the connector: + +```bash copy="fl" +$ fluvio cloud connector create -c string-quotes-source-connector.yml +``` + +Use the following command to see the connector status. + +```bash copy="fl" +$ fluvio cloud connector list + NAME TYPE VERSION CDK STATUS LOG-LEVEL + string-quotes http-source x.y.z V3 Running info + http-quotes http-source x.y.z V3 Running info +``` + +Let's take a look at `string-quotes` + +```bash copy="fl" +$ fluvio consume string-quotes +Consuming records from 'string-quotes' +"It's not whether you get knocked down, it's whether you get up." +"Honesty is the first chapter in the book of wisdom." +``` + +We now have two topics running in parallel and producing different results with a simple SmartModule transformation. When you apply inline transformations, the number of possibilities is virtually endless. + + +## 6. Build Stateful Dataflows + +Stateful Dataflows is currently in preview. With stateful dataflows, you can chain services, accumulate state, and perform window-based aggregates. + +Checkout [SDF section] for additional information. + +## Clean-up Resources + +During this tutorial, we've created connectors that continue generating traffic to our cloud cluster. Run the following commands to clean up: + +```bash copy="fl" +fluvio cloud connector delete http-quotes +fluvio cloud connector delete string-quotes +fluvio topic delete quotes +fluvio topic delete string-quotes +``` + + +## Next Steps + +Now you're familiar with using InfinyOn Cloud with the [Cloud CLI], Check out our [Tutorials]. + +[Fluvio CLI]: fluvio/cli/overview.mdx +[Fluvio Cluster]: fluvio/concepts/architecture/overview.mdx +[Infinyon Cloud]: https://infinyon.cloud/ +[InfinyOn Hub]: hub/overview.mdx +[Connector Development Kit (cdk)]: connectors/cdk.mdx +[SmartModule called jolt]: hub/smartmodules/jolt.mdx +[SmartModule Jolt]: hub/smartmodules/jolt.mdx +[SmartModule Developer Kit (smdk)]: smartmodules/smdk.mdx +[about Topics in the Fluvio docs]: fluvio/concepts/topics.mdx +[about Producers in the Fluvio docs]: fluvio/concepts/produce-consume.mdx#producer +[about Consumers in the Fluvio docs]: fluvio/concepts/produce-consume.mdx#consumer +[a growing number of connectors]: hub/overview.mdx#connectors +[a random quote]: https://demo-data.infinyon.com/api/quote +[about Connectors in the Fluvio docs]: connectors/overview.mdx +[HTTP Source]: hub/connectors/inbound/http.mdx +[InfinyOn Cloud sign-up page]: https://infinyon.cloud/ui/signup +[Docker Installation]: fluvio/installation/docker.mdx +[WebAssembly]: https://webassembly.org/ +[Cloud CLI]: cli/overview.mdx +[Tutorials]: tutorials/index.md +[SDF section]: /sdf diff --git a/versioned_docs/version-0.13.0/cloud/tutorials/_category_.json b/versioned_docs/version-0.13.0/cloud/tutorials/_category_.json new file mode 100644 index 00000000..6ca447b0 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/tutorials/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "Tutorials", + "collapsible": true, + "position": 20 +} diff --git a/versioned_docs/version-0.13.0/cloud/tutorials/amplitude-analytics.mdx b/versioned_docs/version-0.13.0/cloud/tutorials/amplitude-analytics.mdx new file mode 100644 index 00000000..a3ac3a54 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/tutorials/amplitude-analytics.mdx @@ -0,0 +1,120 @@ +--- +sidebar_position: 55 +title: "DataPipeline to Amplitude" +description: "Data pipeline that collects events from multiple services and sends them to Amplitude." +--- + +With this guide, you can send events from InfinyOn Cloud to Amplitude. Connecting your services to this pipeline is simpler than microservices running around + +To follow along in this guide you need: +* The [Amplitude API key] from your account. +* [InfinyOn Cloud] cluster and [Fluvio CLI]. + +## Setup + +### Create secret + +You can follow [Amplitude's instructions for how to collect your api token] so you can create a secret the connector can use when building the json request for Amplitude. + +The Amplitude upload requests requires an api token, and all events for the production environment use the same api token. We'll transform the service event to include this value in the next step. + + +``` +$ fluvio cloud secret set AMPLITUDE_API_TOKEN +Secret "AMPLITUDE_API_TOKEN" set successfully +``` + +### Create connector + +Example event: + +```json +{ + "timestamp": "2023-09-06T12:02:29.658014825Z", + "event": { + "user_id": "user@example.com", + "event_type": "ServiceAbcExampleEvent", + "time": 1696629748241, + "app_version": "c738ca3", + "event_id": 5, + "session_id": 9876645851321, + "insert_id": "d768b1b3-1055-4db8-b214-619b5a321ef5" + } +} +``` + +In this example, each service sends a json object to a topic containing a timestamp for when the event occurred and an [Amplitude event object]. + +Before sending to Amplitude with an HTTP outbound connector, we'll transform the original payload into the [Amplitude upload request json]. + +The transform consists of: +* Removing the `timestamp` key +* Adding the `api_key` key with the value from our `AMPLITUDE_API_TOKEN` secret +* Shift the contents of the `event` key into an array with the key `events` + +Example connector config w/ transforms + +:::info +All versions are marked with `x.y.z`. To find the latest version, run: +* `fluvio hub connector list` +* `fluvio hub smartmodule list` +::: + +```yaml +# amplitude-connector.yaml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: amplitude-connector + type: http-sink + topic: service-events + secrets: + - name: AMPLITUDE_API_TOKEN +http: + endpoint: "https://api2.amplitude.com/2/httpapi" +transforms: + - uses: infinyon/jolt@x.y.z + with: + spec: + - operation: remove + spec: + timestamp: "" + - operation: shift + spec: + "event": "events[0]" + - operation: default + spec: + api_key: "${{ secrets.AMPLITUDE_API_TOKEN }}" +``` + +Save the config and run the following command to create the connector. + + +```shell +$ fluvio cloud connector create -c amplitude-connector.yaml +connector "amplitude-connector" (http-sink) created +``` + +### Send a test event to topic + +The following command will send an example event to the topic our connector is watching. + + +```shell +$ echo '{"timestamp":"2023-09-06T12:02:29.658014825Z","event":{"user_id":"user@example.com","event_type":"ServiceAbcExampleEvent"}}' | fluvio produce service-events +``` + +### Look at Amplitude for your event + +In the Amplitude dashboard, you should be able to verify the test event arrive under `User Look-up` + + + +This is the end of the guide. Once you instrument your services, you should be able to quickly send analytics events to Amplitude from InfinyOn Cloud. + +[Amplitude API key]: https://www.docs.developers.amplitude.com/analytics/find-api-credentials/ +[Amplitude event object]: https://www.docs.developers.amplitude.com/analytics/apis/http-v2-api/#keys-for-the-event-argument +[Amplitude upload request json]: https://www.docs.developers.amplitude.com/analytics/apis/http-v2-api/#upload-request-body-parameters +[Amplitude's instructions for how to collect your api token]: https://www.docs.developers.amplitude.com/analytics/find-api-credentials/ +[InfinyOn Cloud]: https://infinyon.cloud/ui/signup +[Fluvio CLI]: fluvio/quickstart.mdx#install-fluvio diff --git a/versioned_docs/version-0.13.0/cloud/tutorials/cloudflare-workers.mdx b/versioned_docs/version-0.13.0/cloud/tutorials/cloudflare-workers.mdx new file mode 100644 index 00000000..0b56f8fd --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/tutorials/cloudflare-workers.mdx @@ -0,0 +1,200 @@ +--- +sidebar_position: 51 +title: "How to create a Cloudflare worker that sends Webhook events to InfinyOn Cloud" +description: "How to create a Cloudflare worker that sends events to a Webhook in InfinyOn Cloud." +--- + +# Integrate Cloudflare Workers with InfinyOn Cloud + +Connect your Cloudflare workers with InfinyOn Cloud for powerful event-processing data pipelines. InfinyOn Cloud's robust data streaming allows you to seamlessly move and transform data and trigger actions. + +In this guide, we'll build a simple CloudFlare worker that sends events to InfinyOn Cloud through the [webhook API]. + +##### Use Cases +* Send form submission [notifications to Slack]. +* Send clickstream [events to Amplitude]. +* Send form submissions to HubSpot. + +## Prerequisites + +To follow along you'll need the following: + +* [npm] & [curl] installed locally +* [Fluvio CLI] installed locally +* Account on [InfinyOn Cloud] + +Let's get started. + +## Create a Webhook + +External services send events to InfinyOn Cloud via webhooks, connectors, and custom clients. In this example, we'll use webhooks. + +Use [Fluvio CLI] to create a webhook on [InfinyOn Cloud]. + +1. Create a webhook configuration file `cf-webhook.yaml`: + + +```yaml +meta: + name: cf-webhook + topic: cf-events + +webhook: + outputParts: body + outputType: json +``` + +2. Create the webhook endpoint: + + +```bash +$ fluvio cloud webhook create -c cf-webhook.yaml +Webhook "cf-webhook" created with url: https://infinyon.cloud/webhooks/v1/xyz +``` + +The command returns an endpoint that tells Cloudflare where InfinyOn is listening for events. + +Use `fluvio cloud webhook list` to list all your webhooks. + +## Build a Cloudflare Worker + +Cloudflare uses [Wrangler], a command-line tool that helps developers build workers. + +#### Install Wrangler + +Use `npm` to install wrangler: + + +```bash +$ npm install -g wrangler +``` + +#### Create a Worker + +Next, we'll create a directory, write the worker code, and provision a configuration file for wrangler to access our code. + +1. Create a project directory: + + +```bash +$ mkdir cf-infinyon; cd ./cf-infinyon +``` + +2. Create a file `index.js` and add the following code: + + + +```js +const WEBHOOK_URL = "https://infinyon.cloud/webhooks/v1/xyz"; + +addEventListener("fetch", (event) => { + event.respondWith(handleRequest(event.request)); +}); + +async function handleRequest(request) { + let jsonData = await request.json(); + + const response = await fetch(WEBHOOK_URL, { + method: "POST", + headers: { + "Content-type": `application/json`, + }, + body: JSON.stringify(jsonData) + } + ); + + const text_response = response.ok ? "" : "Webhook gateway error."; + + return new Response(text_response, { status: response.status }); +} +``` + +The worker fetches an event, retrieves its JSON payload, and forwards it to the webhook gateway on InfinyOn Cloud. + +**Note**: Update the endpoint link with your own ([see above](#create-a-webhook)). + + +3. Add a wrangler configuration file file `wrangler.toml` and add the following settings: + + +```toml +name = "cf-infinyon" +main = "index.js" +compatibility_date = "2023-09-04" +``` + +We are all set to run the code, but first let's review the directory: + +```bash +├── index.js +└── wrangler.toml +``` + +## Test Cloudflare to InfinyOn Cloud Pipeline + +With all the components provisioned, we should be ready to test our data pipeline end-to-end. + +1. Start the Cloudflare worker: + + +```bash +$ wrangler dev +Starting local server... +Ready on http://0.0.0.0:8787 +``` + +2. Start the InfinyOn Cloud consumer: + + +```bash +$ fluvio consume cf-events --output json +Consuming records from 'cf-events' +⠤ +``` + +3. Use curl to post an event: + + +```bash +$ curl -v -X POST http://0.0.0.0:8787 \ + -H "Content-Type: application/json" \ + -d '{"hello": "world!"}' +``` + +4. The InfinyOn consumer show display the following event: + +``` +{ + "hello": "world!" +} +``` + +Congratulations! :tada: You have bridged Cloudflare workers with InfinyOn Cloud, the first set in building data reach event-driven services. + +**Next steps**: +* apply [smartmodule transformations] inside the webhook configuration to shape the data before it is written to the topic +* attach sink connector that dispatches these events to other service such as [Slack], [Amplitude], [SQL databases], etc. + + +### Reference + +* [Webhook to Slack] +* [Webhook Basics] +* [Webhook Configuration File] + + +[notifications to Slack]: ./webhook-to-slack.mdx +[events to Amplitude]: cloud/tutorials/amplitude-analytics.mdx +[webhook API]: cloud/how-to/use-webhooks.mdx +[npm]: https://docs.npmjs.com/downloading-and-installing-node-js-and-npm +[curl]: https://everything.curl.dev +[Fluvio CLI]: fluvio/quickstart.mdx#install-fluvio +[InfinyOn Cloud]: https://infinyon.cloud/ui/signup +[Wrangler]: https://developers.cloudflare.com/workers/wrangler/install-and-update/ +[smartmodule transformations]: fluvio/concepts/transformations.mdx +[Slack]: https://slack.com +[Amplitude]: https://amplitude.com +[SQL databases]: https://en.wikipedia.org/wiki/SQL +[Webhook to Slack]: ./webhook-to-slack.mdx +[Webhook Basics]: cloud/how-to/use-webhooks.mdx +[Webhook Configuration File]: ./webhook-to-slack.mdx#create-webhook-configuration-file diff --git a/versioned_docs/version-0.13.0/cloud/tutorials/github-to-discord.mdx b/versioned_docs/version-0.13.0/cloud/tutorials/github-to-discord.mdx new file mode 100644 index 00000000..79cc10a4 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/tutorials/github-to-discord.mdx @@ -0,0 +1,169 @@ +--- +sidebar_position: 11 +title: "Create a Data Pipeline from GitHub to Discord" +description: "Data pipeline that detects changes in GitHub and publishes them as events to Discord." +--- + +Create an end-to-end event pipeline that detects changes in github stars & forks and publishes the result to Discord. This guide uses two connectors: + +* [http-source]: to read periodically from a github, parse the fields from the `json` output, and publish the result to a topic. +* [http-sink]: to listen to the same topic, detect changes, and publish the result to Discord. + +Let's get started. + +## Prerequisites + +* [Fluvio CLI] running locally +* Account on [InfinyOn Cloud] + +## Step-by-Step + +1. [Create http-source configuration file](#create-http-source-configuration-file) +2. [Create http-sink configuration file](#create-http-sink-configuration-file) +3. [Download smartmodules](#download-smartmodules) +4. [Start Connectors](#start-connectors) +5. [Test Data Pipeline](#test-data-pipeline) + +### Create http-source configuration file + +Create an HTTP source connector configuration file called `github.yaml` : + +:::info +All versions are marked with `x.y.z`. To find the latest version, run: +* `fluvio hub connector list` +* `fluvio hub smartmodule list` +::: + +```yaml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: github-stars-in + type: http-source + topic: stars-forks + secrets: + - name: GITHUB_TOKEN +http: + endpoint: 'https://api.github.com/repos/infinyon/fluvio' + method: GET + headers: + - 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' + interval: 30s +transforms: + - uses: infinyon/jolt@x.y.z + with: + spec: + - operation: shift + spec: + "stargazers_count": "stars" + "forks_count": "forks" +``` + +Github rate-limits API requests to 60 per hour, which you an extend to 5000 by creating an application token. Check out github documentation on how to create an [Access Tokens]. + +Add the access token `secret` in [InfinyOn Cloud] : + + +```bash +$ fluvio cloud secret set GITHUB_TOKEN +``` + +### Create http-sink configuration file + +Create an HTTP source connector configuration file called `discord.yaml` : + + +```yaml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: discord-stars-out + type: http-sink + topic: stars-forks + secrets: + - name: DISCORD_TOKEN +http: + endpoint: "https://discord.com/api/webhooks/${{ secrets.DISCORD_TOKEN }}" + headers: + - "Content-Type: application/json" +transforms: + - uses: infinyon-labs/stars-forks-changes@x.y.z + lookback: + last: 1 + - uses: infinyon/jolt@x.y.z + with: + spec: + - operation: shift + spec: + "result": "content" +``` + +Check out [Discord Webhooks] on how to create a channel webhook token. + +Add the access token `secret` in [InfinyOn Cloud] : + + +```bash +$ fluvio cloud secret set DISCORD_TOKEN +``` + +### Download Smartmodules + +Download the smartmodules used by the connectors to your cluster: + + +```bash copy="cmd" +$ fluvio hub sm download infinyon/jolt@x.y.z +$ fluvio hub sm download infinyon-labs/stars-forks-changes@x.y.z +``` + + +### Start Connectors + +Start source & sink connectors: + + +```bash +$ fluvio cloud connector create -c github.yaml +$ fluvio cloud connector create -c discord.yaml +``` + +Check `fluvio cloud connector log` to ensure they have been successfully provisioned. + +### Test Data Pipeline + +Check the last values generated by the github connector: + + +```bash +$ fluvio consume -dT 1 stars-forks +{"stars":1770,"forks":138} +``` + +Produce a new value + + +```bash +$ fluvio produce stars-forks +> {"stars":1769,"forks":138} +OK +``` + +An alert with `:star2: 1769` will show-up in your discord channel. See it live at [Fluvio Community - #alerts] channel. + + +### Reference + +* [labs-stars-forks-changes-sm] + + +--- + +[labs-stars-forks-changes-sm]: https://github.com/infinyon/labs-stars-forks-changes-sm +[Fluvio CLI]: fluvio/quickstart.mdx#install-fluvio +[InfinyOn Cloud]: https://infinyon.cloud/ui/signup +[http-sink]: https://github.com/infinyon/http-sink-connector +[http-source]: https://github.com/infinyon/http-source-connector +[Access Tokens]: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token +[Discord Webhooks]: https://support.discord.com/hc/en-us/articles/228383668-Intro-to-Webhooks +[Fluvio Community - #alerts]: https://discord.com/channels/695712741381636168/961802307727683644 diff --git a/versioned_docs/version-0.13.0/cloud/tutorials/github-to-slack.mdx b/versioned_docs/version-0.13.0/cloud/tutorials/github-to-slack.mdx new file mode 100644 index 00000000..f4999c2e --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/tutorials/github-to-slack.mdx @@ -0,0 +1,168 @@ +--- +sidebar_position: 10 +title: "Create a Data Pipeline from GitHub to Slack" +description: "Data pipeline that detects changes in GitHub and publishes them as events to Slack." +--- + +Create an end-to-end event pipeline that detects changes in github stars & forks and publishes the result to Slack. This guide uses two connectors: + +* [http-source]: to read periodically from a github, parse the fields from the `json` output, and publishes the result to a topic. +* [http-sink]: to listen to the same topic, detect changes, and publish the result to Slack. + +Let's get started. + +## Prerequisites + +* [Fluvio CLI] running locally +* Account on [InfinyOn Cloud] + +## Step-by-Step + +1. [Create http-source configuration file](#create-http-source-configuration-file) +2. [Create http-sink configuration file](#create-http-sink-configuration-file) +3. [Download smartmodules](#download-smartmodules) +4. [Start Connectors](#start-connectors) +5. [Test Data Pipeline](#test-data-pipeline) + +### Create http-source configuration file + +Create an HTTP source connector configuration file called `github.yaml` : + +:::info +All versions are marked with `x.y.z`. To find the latest version, run: +* `fluvio hub connector list` +* `fluvio hub smartmodule list` +::: + +```yaml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: github-stars-in + type: http-source + topic: stars-forks + secrets: + - name: GITHUB_TOKEN +http: + endpoint: 'https://api.github.com/repos/infinyon/fluvio' + method: GET + headers: + - 'Authorization: token ${{ secrets.GITHUB_TOKEN }}' + interval: 30s +transforms: + - uses: infinyon/jolt@x.y.z + with: + spec: + - operation: shift + spec: + "stargazers_count": "stars" + "forks_count": "forks" +``` + +Github rate-limits API requests to 60 per hour, which you an extend to 5000 by creating an application token. Check out github documentation on how to create an [Access Tokens]. + +Add the access token `secret` in [InfinyOn Cloud] : + + +```bash +$ fluvio cloud secret set GITHUB_TOKEN +``` + +### Create http-sink configuration file + +Create an HTTP source connector configuration file called `slack.yaml` : + + +```yaml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: slack-stars-out + type: http-sink + topic: stars-forks + secrets: + - name: SLACK_TOKEN +http: + endpoint: "https://hooks.slack.com/services/${{ secrets.SLACK_TOKEN }}" + headers: + - "Content-Type: application/json" +transforms: + - uses: infinyon-labs/stars-forks-changes@x.y.z + lookback: + last: 1 + - uses: infinyon/jolt@x.y.z + with: + spec: + - operation: shift + spec: + "result": "text" +``` + +Check out [Slack Webhooks] on how to create a channel webhook token. + +Add the access token `secret` in [InfinyOn Cloud] : + + +```bash +$ fluvio cloud secret set SLACK_TOKEN +``` + +### Download Smartmodules + +Download the smartmodules used by the connectors to your cluster: + + +```bash +$ fluvio hub sm download infinyon/jolt@x.y.z +$ fluvio hub sm download infinyon-labs/stars-forks-changes@x.y.z +``` + + +### Start Connectors + +Start source & sink connectors: + + +```bash +$ fluvio cloud connector create -c github.yaml +$ fluvio cloud connector create -c slack.yaml +``` + +Check `fluvio cloud connector log` to ensure they have been successfully provisioned. + +### Test Data Pipeline + +Check the last values generated by the github connector: + + +```bash +$ fluvio consume -dT 1 stars-forks +{"stars":1770,"forks":138} +``` + +Produce a new value + + +```bash +$ fluvio produce stars-forks +> {"stars":1769,"forks":138} +OK +``` + +An alert with `:star2: 1769` will show-up in your slack channel. + + +### Reference + +* [labs-stars-forks-changes-sm] + + +--- + +[labs-stars-forks-changes-sm]: https://github.com/infinyon/labs-stars-forks-changes-sm +[Fluvio CLI]: fluvio/quickstart.mdx#install-fluvio +[InfinyOn Cloud]: https://infinyon.cloud/ui/signup +[http-sink]: https://github.com/infinyon/http-sink-connector +[http-source]: https://github.com/infinyon/http-source-connector +[Access Tokens]: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token +[Slack Webhooks]: https://api.slack.com/messaging/webhooks diff --git a/versioned_docs/version-0.13.0/cloud/tutorials/hackernews-reader.mdx b/versioned_docs/version-0.13.0/cloud/tutorials/hackernews-reader.mdx new file mode 100644 index 00000000..1717e1f3 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/tutorials/hackernews-reader.mdx @@ -0,0 +1,101 @@ +--- +sidebar_position: 52 +title: "Create a Hackernews Reader" +description: "Data pipeline that periodically reads articles from Hackernews and publishes them on a topic." +--- + +Hackernews Reader helps you build an XML reader that ingests hackernews articles, converts them to `json`, divides them into records, and publishes each record to a topic. This guide uses the following connector: + +* [http-source]: to read periodically from a hackernews, parse the XML result into `json` records, and publish the result to a topic. + +## Prerequisites + +* [Fluvio CLI] running locally +* Account on [InfinyOn Cloud] + +## Step-by-Step + +1. [Create http-source configuration file](#create-http-source-configuration-file) +2. [Download smartmodules](#download-smartmodules) +3. [Start Connector](#start-connector) +4. [Check Results](#check-results) + +### Create http-source configuration file + +Create an HTTP source connector configuration file called `hackernews.yaml` : + +:::info +All versions are marked with `x.y.z`. To find the latest version, run: +* `fluvio hub connector list` +* `fluvio hub smartmodule list` +::: + +```yaml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: hackernews + type: http-source + topic: hackernews +http: + method: GET + endpoint: 'https://hnrss.org/newest' + interval: 600s +transforms: + - uses: infinyon-labs/rss-json@x.y.z + - uses: infinyon/jolt@x.y.z + with: + spec: + - operation: shift + spec: + items: "" + - uses: infinyon-labs/array-map-json@x.y.z +``` + +### Download smartmodules + +Download the smartmodules used by the connectors to your cluster: + + +```bash +$ fluvio hub sm download infinyon/jolt@x.y.z +$ fluvio hub sm download infinyon-labs/rss-json@x.y.z +$ fluvio hub sm download infinyon-labs/array-map-json@x.y.z +``` + +### Start Connector + + +```bash +$ fluvio cloud connector create -c hackernews.yaml +``` + +### Check Results + +Connector logs: + +```bash +$ fluvio cloud connector log hackernews +``` + +Records produced: + +```bash +$ fluvio consume hackernews -T 10 +``` + +### Reference + +- [http-source] +- [array-map-json] +- [rss-json] +- [jolt] + +--- + +[Fluvio CLI]: fluvio/quickstart.mdx#install-fluvio +[InfinyOn Cloud]: https://infinyon.cloud/ui/signup +[http-source]: https://github.com/infinyon/http-source-connector +[array-map-json]: https://github.com/infinyon/labs-array-map-json-sm +[rss-json]: https://github.com/infinyon/labs-rss-json-sm +[jolt]: https://github.com/infinyon/fluvio-jolt diff --git a/versioned_docs/version-0.13.0/cloud/tutorials/http-to-sql.mdx b/versioned_docs/version-0.13.0/cloud/tutorials/http-to-sql.mdx new file mode 100644 index 00000000..4cc31291 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/tutorials/http-to-sql.mdx @@ -0,0 +1,363 @@ +--- +sidebar_position: 30 +title: "Create a Data Pipeline from HTTP to SQL" +description: "Data pipeline that periodically reads from a website and sends the result to a SQL database." +--- + +This guide expects your Fluvio Cluster is already installed. If you need to install it, please follow the instructions [here][installation]! + +## Connector Pipeline + +execution flow of InfinyOn pipeline + + +There are two main steps for this tutorial: +* Creating an Inbound HTTP Connector to collect JSON + * Receive data without any modifications + * JSON to JSON transformation before send to topic +* Creating an Outbound SQL Connector to insert the input JSON into a database + * Basic insert + * JSON to JSON transformation before insert + +We will be looking at the [Inbound HTTP Connector] setup, and connecting to the [castfact.ninja] database to ingest and store JSON data into a topic. + +The Outbound connector will be using a [PostgreSQL] database. It will listen to the topic for new records and insert them into a table. + +You can use your own PostgreSQL instance, if it can be reached over the internet. But you can still follow along by creating a PostgreSQL database at a hosting service, such as [ElephantSQL]. + +## Connectors + +If you wish to automatically collect information from one source and send it to +Fluvio, or send data from Fluvio to location, Connectors are the way to go. When +given the information on the interface through the Connector configuration file, +Fluvio can poll a multitude of input types. + +### Connector Configuration + +A detailed description of the Connector configuration file can be found in the [Connector Configuration] page. + + +### Inbound Connector + +For the HTTP-specific parameters you will need to specify the link it is +polling, and the interval at which it polls. + +:::info +All versions are marked with `x.y.z`. To find the latest version, run: +* `fluvio hub connector list` +* `fluvio hub smartmodule list` +::: + +```yaml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: cat-facts + type: http-source + topic: cat-facts + +http: + endpoint: "https://catfact.ninja/fact" + interval: 10s +``` + +This creates a connector named `cat-facts`, that reads from the website `https://catfact.ninja/fact` every 10 seconds, and produces to the topic `cat-facts`. + +#### Testing the Inbound Connector + +You can register the connector to Fluvio with `fluvio cloud connector create --config=` + +```bash +$ fluvio cloud connector create --config=catfacts-basic-connector.yml +``` + +You can use `fluvio cloud connector list` to view the status of the connector. + +```bash +$ fluvio cloud connector list +NAME TYPE VERSION CDK STATUS +cat-facts http-source 0.1.0 V3 Running +``` + +And `fluvio consume` to view the incoming data in the topic. + +```bash +$ fluvio consume cat-facts-data -dT4 +Consuming records starting 4 from the end of topic 'cat-facts-data' +{"fact":"A cat lover is called an Ailurophilia (Greek: cat+lover).","length":57} +{"fact":"British cat owners spend roughly 550 million pounds yearly on cat food.","length":71} +{"fact":"Fossil records from two million years ago show evidence of jaguars.","length":67} +{"fact":"Relative to its body size, the clouded leopard has the biggest canines of all animals\u2019 canines. Its dagger-like teeth can be as long as 1.8 inches (4.5 cm).","length":156} +``` + +#### Inbound Connector with JSON to JSON transformation before writing to topic +All Inbound Connectors support [transformations] which are applied before the data is sent to the topic. +We can extend our config file to add an additional JSON to JSON transformation to records. + +```yaml title="catfacts-basic-connector-with-transform.yml" +apiVersion: 0.1.0 +meta: + version: x.y.z + name: cat-facts-transformed + type: http-source + topic: cat-facts-data-transformed + +http: + endpoint: https://catfact.ninja/fact + interval: 10s + +transforms: + - uses: infinyon/jolt@x.y.z + with: + spec: + - operation: default + spec: + source: "http" +``` + +In this config, we add the field `source` with the static value `http` to every record. Note that if the field +already exists, it will not be overwritten. + +Before we create the connector we need to add [infinyon/jolt@x.y.z] SmartModule to the cluster. +This SmartModule uses a domain specific language (DSL) called [Jolt], to specify a transformation of input JSON to another shape of JSON data. + +Let's download this SmartModule from the [SmartModule Hub]. + +```bash +$ fluvio hub sm download infinyon/jolt@x.y.z +``` + +Then, we create a connector just like before + +```bash +$ fluvio cloud connector create --config catfacts-basic-connector-with-transform.yaml +``` + +And `fluvio consume` to view the transformed data in the topic. + +```bash +$ fluvio consume cat-facts-data-transformed -dT4 +Consuming records starting 4 from the end of topic 'cat-facts-data-transformed' +{"fact":"The Amur leopard is one of the most endangered animals in the world.","length":68,"source":"http"} +{"fact":"Some cats have survived falls of over 65 feet (20 meters), due largely to their “righting reflex.” The eyes and balance organs in the inner ear tell it where it is in space so the cat can land on its feet. Even cats without a tail have this ability.","length":249,"source":"http"} +{"fact":"In Holland’s embassy in Moscow, Russia, the staff noticed that the two Siamese cats kept meowing and clawing at the walls of the building. Their owners finally investigated, thinking they would find mice. Instead, they discovered microphones hidden by Russian spies. The cats heard the microphones when they turned on.","length":318,"source":"http"} +{"fact":"Cats can be right-pawed or left-pawed.","length":38,"source":"http"} +``` + +### Outbound Connector + +#### Setup +For the SQL Outbound connector example, we will need to create a table in our Postgres database. + +Run this query in your database before starting any Outbound connectors. + +```sql +D create table animalfacts(length integer, raw_fact_json jsonb) +``` + +We also need to run a few commands with `fluvio` to download some prepackaged SmartModules from the SmartModule Hub to attach to the Outbound Connector. + +[This SmartModule](https://www.fluvio.io/smartmodules/certified/json-sql) will do a basic mapping of the JSON input into a SQL statement for the Outbound SQL connector + +```bash +$ fluvio hub sm download infinyon/json-sql@x.y.z +``` + +If you have not added `infinyon/jolt@x.y.z` SmartModule on previous steps, we need to add it as well: + +```bash +$ fluvio hub sm download infinyon/jolt@x.y.z +``` + +For more info about the SmartModule Hub, check out the [Hub Overview page](https://www.fluvio.io/smartmodules/hub/overview/) + +#### Outbound SQL with basic SQL inserts +In this connector, we will listen in on the `cat-facts` topic. Whenever a new fact is produced to the topic, the Outbound SQL connector will insert the record into a table named `animalfacts`. The length in one column called `length` and the entire JSON in another column `raw_fact_json`. + +```yaml +# sql.yaml +apiVersion: 0.1.0 +meta: + name: simple-cat-facts-sql + type: sql-sink + version: x.y.z + topic: cat-facts +sql: + url: "postgres://user:password@db.postgreshost.example/dbname" +transforms: + - uses: infinyon/json-sql@x.y.z + invoke: insert + with: + mapping: + table: "animalfacts" + map-columns: + "length": + json-key: "length" + value: + type: "int" + default: "0" + required: true + "raw_fact_json": + json-key: "$" + value: + type: "jsonb" + required: true +``` + + +And we create the Outbound connector just like the Inbound connector + +```bash +$ fluvio cloud connector create --config sql-basic.yml +connector "simple-cat-facts-sql" (sql-sink) created +``` + +After a few seconds, we can see data in the PostgreSQL table, + + +```bash +D select * from animalfacts; ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| length | raw_fact_json | +|--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 74 | {"fact": "A cat’s jaw can’t move sideways, so a cat can’t chew large chunks of food.", "length": 74} | +| 110 | {"fact": "Unlike humans, cats are usually lefties. Studies indicate that their left paw is typically their dominant paw.", "length": 110} | +| 114 | {"fact": "A commemorative tower was built in Scotland for a cat named Towser, who caught nearly 30,000 mice in her lifetime.", "length": 114} | +| 98 | {"fact": "Statistics indicate that animal lovers in recent years have shown a preference for cats over dogs!", "length": 98} | +| 78 | {"fact": "Approximately 1/3 of cat owners think their pets are able to read their minds.", "length": 78} | +| 95 | {"fact": "At 4 weeks, it is important to play with kittens so that they do not develope a fear of people.", "length": 95} | +| 46 | {"fact": "Jaguars are the only big cats that don't roar.", "length": 46} | +| 31 | {"fact": "Female felines are \\superfecund", "length": 31} | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +``` + + +#### Outbound SQL with JSON to JSON transformation before insert +In this connector, we will listen in on the `cat-facts` topic. + +But before we insert into the database, we specify a transformation. The resulting JSON we see inserted in the table has the `length` removed, and adds `type: cat` to every JSON. + +```yaml title="sql-chain.yaml" +apiVersion: 0.1.0 +meta: + name: transform-cat-facts-sql + type: sql-sink + version: x.y.z + topic: cat-facts-data +sql: + url: "postgres://user:password@db.postgreshost.example/dbname" +transforms: + - uses: infinyon/jolt@x.y.z + with: + spec: + - operation: shift + spec: + fact: "animal.fact" + length: "length" + - operation: default + spec: + animal: + type: "cat" + - uses: infinyon/json-sql@x.y.z + with: + invoke: insert + mapping: + table: "animalfacts" + map-columns: + "length": + json-key: "length" + value: + type: "int" + default: "0" + required: true + "raw_fact_json": + json-key: "animal" + value: + type: "jsonb" + required: true +``` + +Create another connector with our transformations. + +```bash +$ fluvio cloud connector create --config sql-transform.yml +connector "transform-cat-facts-sql" (sql-sink) created +``` + +After a few seconds, we can see data in the PostgreSQL table with our configured transformations. + + +```bash +D select * from animalfacts; ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +| length | raw_fact_json | +|--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 58 | {"fact": "A cat can spend five or more hours a day grooming himself.", "type": "cat"} | +| 110 | {"fact": "Unlike humans, cats are usually lefties. Studies indicate that their left paw is typically their dominant paw.", "type": "cat"} | +| 163 | {"fact": "Retractable claws are a physical phenomenon that sets cats apart from the rest of the animal kingdom. I n the cat family, only cheetahs cannot retract their claws.", "type": "cat"} | +| 78 | {"fact": "Approximately 1/3 of cat owners think their pets are able to read their minds.", "type": "cat"} | +| 145 | {"fact": "A sexually-active feral tom-cat \\owns\\\" an area of about three square miles and \\\"\"sprays\\\"\" to mark his territory with strong smelling urine.\"\"\"", "type": "cat"} | +| 149 | {"fact": "It has been scientifically proven that owning cats is good for our health and can decrease the occurrence of high blood pressure and other illnesses.", "type": "cat"} | +| 73 | {"fact": "In relation to their body size, cats have the largest eyes of any mammal.", "type": "cat"} | ++--------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +``` + + +### Deleting connectors + +To stop the traffic from all the connectors, you run `fluvio cloud connector delete `. + +This will delete the connector, but not the topic is was attached to + +```bash +$ fluvio cloud connector delete cat-facts cat-facts-transformed simple-cat-facts-sql transform-cat-facts-sql +connector "cat-facts" deleted +connector "cat-facts-transformed" deleted +connector "simple-cat-facts-sql" deleted +connector "transform-cat-facts-sql" deleted +``` + +## Conclusion + +We used the Inbound HTTP Connector to ingest JSON data from an endpoint and save it in a topic. We configured +transformation of outgoing JSON records. + +There was a brief introduction to the SmartModule Hub, which enabled the Outbound SQL connector to consume data. + +With the Outbound SQL Connector, we utilized SmartModules in two different ways. +1. Basic insert into a table, with a simple mapping of JSON fields into columns +2. Configured transformation of the incoming JSON before following the same mapping process + + +## Reference + +* [Fluvio CLI Produce] +* [Fluvio CLI Consume] +* [Fluvio CLI Topic] +* [Fluvio CLI Profile] +* [SmartModule] +* [SmartModule Rust API] +* [Transformations] + + +[Fluvio CLI Produce]: fluvio/cli/fluvio/produce.mdx +[Fluvio CLI Consume]: fluvio/cli/fluvio/consume.mdx +[Fluvio CLI Topic]: fluvio/cli/fluvio/topic.mdx +[Fluvio CLI Profile]: fluvio/cli/fluvio/profile.mdx +[SmartModule]: smartmodules/overview.mdx +[SmartModule Rust API]: https://docs.rs/fluvio-smartmodule/latest/fluvio_smartmodule/ +[Transformations]: fluvio/concepts/transformations.mdx +[Inbound HTTP Connector]: hub/connectors/inbound/http.mdx +[castfact.ninja]: https://catfact.ninja +[PostgreSQL]: https://www.postgresql.org/ +[ElephantSQL]: https://www.elephantsql.com/ +[Connector Configuration]: connectors/configuration.mdx +[infinyon/jolt@x.y.z]: hub/smartmodules/jolt.mdx +[Jolt]: https://github.com/infinyon/fluvio-jolt +[SmartModule Hub]: hub/smartmodules/index.md +[installation]: fluvio/quickstart.mdx#install-fluvio diff --git a/versioned_docs/version-0.13.0/cloud/tutorials/index.md b/versioned_docs/version-0.13.0/cloud/tutorials/index.md new file mode 100644 index 00000000..d7087a4c --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/tutorials/index.md @@ -0,0 +1,5 @@ +import DocCardList from '@theme/DocCardList'; + +# Tutorials + + \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/cloud/tutorials/iot-mirroring-cloud.mdx b/versioned_docs/version-0.13.0/cloud/tutorials/iot-mirroring-cloud.mdx new file mode 100644 index 00000000..e0a4eed8 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/tutorials/iot-mirroring-cloud.mdx @@ -0,0 +1,407 @@ +--- +sidebar_position: 41 +title: "Mirroring IoT Data from Edge to Cloud" +description: "Reliable IoT monitoring from movable Edges sensors with poor connections to Cloud." +--- + +InfinyOn IoT edge is a **~14 Mb** binary that runs on ARMv7 chips on less than **256 MB** memory. We are working with teams building the future of monitoring dynamic assets to push the boundaries of edge data stream processing. + +* If connected, InfinyOn IoT edge sends telemetry and events to the InfinyOn Cloud in real-time using mirroring. + +* If disconnected, the InfinyOn IoT edge stream processor caches events locally. When the connection resumes, the InfinyOn IoT edge stream processor brings InfinyOn Cloud up to date and continues mirroring until the subsequent connection loss. + +## Benefits + +The benefits of the InfinyOn solution are as follows: +* Reliable edge-to-cloud synchronization: + * Real-time publishing when connected. + * Automatic synchronization after reconnect. + * Edge devices can be offline for extended periods (days). +* Edge collection without downtime when disconnected. + * Reliable local caching for gigabytes of data. + * Simplified logic for edge clients. + * Edge cluster provides a reliable connection to the local clients. +* Intelligent processing at the edge with InfinyOn Smartmodules + * filter + * transform + * enrich +* Hierarchical processing, where you decide where to apply the transformations. +* Built-in cloud connectors to push events to databases and other core products. + + +## Installation + +In this tutorial we'll use VM emulator to create and edge endpoint and mirror traffic to InfinyOn Cloud. + +Let's get started. + + +## Setup InfinyOn Cloud + +Mirroring is an experimental feature using a development cluster. Please get in touch with us on [Discord] to request access for your organization. Upon approval, please continue as follows: + + +### Create a Cloud account + +Using your web browser, navigate to [https://infinyon.cloud/ui/signup], where this experimental feature is available. + +After the account is created, you will be placed in the Dashboard. You may choose to create a cluster in the GUI. In this tutorial, we'll create a cluster using the CLI later. + + +### Download `fluvio` binary + +Use `curl` to download and install: + +```bash +curl -fsS https://hub.infinyon.cloud/install/install.sh | bash +``` + +Make sure to add `.fluvio/bin` to the `$PATH`as specified in the installation script. + + +### Login to InfinyOn Cloud + +Login to InfinyOn Cloud: + + +```bash +fluvio cloud login --use-oauth2 +``` + +Leave out `--use-oauth2` if you prefer username/password method. + + +### Provision a new Cluster + +Let's provision a new cloud cluster: + + +```bash +fluvio cloud cluster create +``` + +Check the result with: + + +```bash +fluvio cluster status +``` + +Next, we'll configure the cluster to receive traffic from the edge clusters. + +### Register Edge cluster + +Let's register the edge cluster `edge1` to inform our home cluster from the cloud to accept connection requests from the remote device: + + +```bash +fluvio remote register edge1 +``` + +### Create the mirror topic + +Each edge cluster mirror connects a partition of a topic, where each partition has a `1-to-1` relationship with the edge cluster. + +Create a partition assignment file with an array of edge mirrors we expect to connect this cluster: + + +```bash +echo '[ + "edge1" +]' > assignment_file.json +``` + +Apply the configuration file to create the topic: + + +```bash +fluvio topic create edge-topic --mirror-apply assignment_file.json +``` + +List partitions to check the assignment: + + +```bash +fluvio partition list +``` + +It should display all partitions: + +```bash + TOPIC PARTITION LEADER MIRROR REPLICAS RESOLUTION SIZE HW LEO LRS FOLLOWER OFFSETS + edge-topic 0 5001 edge1 [] Online 0 B 0 0 0 0 [] +``` + +List remote clusters to check their status: + + +```bash +fluvio remote list +``` + +It should show the following: + +```bash + REMOTE SC STATUS SPU STATUS LAST SEEN ERRORS + edge1 Waiting Waiting - - +``` + + +### Create a new directory + +In the next step, we'll create a configuration file that we'll need to pass along to the edge device. It's easier if we make a clean directory and pass it along to the VM emulator: + + +```bash +mkdir -p ~/local/projects/mirror; cd ~/local/projects/mirror +``` + + +### Generate metadata for Edge Cluster + +Each edge cluster requires a unique metadata file that informs the edge cluster how to connect with the home cluster. Create the config file by running the following command: + + +```bash +fluvio cloud remote export edge1 --file edge1.json +``` + +The Cloud cluster configuration is now complete. Next, we'll create an edge cluster that synchronizes data to the Cloud. + + +## Install Edge Cluster on Local VM + +We'll start an edge cluster on our local computer in a VM using OrbStack. + +### Instal OrbStack + +We'll use OrbStack for the VM management: + +1. [Install OrbStack] + +2. Start Ubuntu VM machine. + +3. Click the VM to open a terminal. + +4. Using the terminal, navigate to your data directory: + + +```bash +cd local/projects/mirror +``` + +All files we've generated on the local machines should be visible here. + + +### Download fluvio binaries + +Download binaries: + + +```bash +curl -fsS https://hub.infinyon.cloud/install/install.sh | bash +``` + +Add to path: + + +```bash +echo 'export PATH="${HOME}/.fluvio/bin:${PATH}"' >> ~/.bashrc +source ~/.bashrc +``` + +Run the following command to double check: + + +```bash +fluvio version +``` + + +### Start Edge Cluster + +First we will start the cluster: + + +```bash +fluvio cluster start +``` + + +Then, we'll connect to the Cloud with the metadata `edge1`: + +```bash +fluvio home connect --file edge1.json +``` + +Let's check the partitions: + + +```bash +fluvio partition list +``` + +The edge device should show the following partition: + +```bash + TOPIC PARTITION LEADER MIRROR REPLICAS RESOLUTION SIZE HW LEO LRS FOLLOWER OFFSETS + edge-topic 0 5001 0:public_endpoint [] Online 0 B 0 0 0 0 [] +``` + +--- + + +## Test 1: Mirroring from VM Edge to Cloud + +Let's produce on the Edge VM cluster and consume from the Cloud cluster. + +### Produce on Edge + +Produce on the `edge` terminal: + + +```bash +fluvio produce edge-topic +``` + +```bash +> 1 +Ok! +> 2 +Ok! +``` + +### Consume from Cloud + +Consume on the `cloud` terminal: + + +```bash +fluvio consume edge-topic --mirror edge1 -B +``` + +```bash +1 +2 +``` + +Mirror test is successful. + + +## Test 2: Cloud Cluster Offline + +To simulate a disconnect, we'll perform the following steps: + +1. **Turn off** the network connection to the internet. + + +2. **Produce** records on the `edge` terminal. + + +```bash +fluvio produce edge-topic +``` + +``` +> 3 +Ok! +> 4 +Ok! +> 5 +Ok! +``` + +3. **Turn on** the network connection and check that the data is synced. + +The topic on the Cloud cluster should automatically synchronize with the edge cluster. + +4. **Consume** from the `cloud` terminal: + +Wait for the connection retry interval to trigger for the new records to arrive, then consume: + + +```bash +fluvio consume edge-topic --mirror edge1 -B +``` + +``` +1 +2 +3 +4 +5 +``` + +The disconnect test was successful. + + + +## Test 3: Edge Cluster Offline + +This test ensures that the edge cluster will preserve all cached data following a power loss. + +### Restart Edge Cluster + +On the `edge` terminal, shutdown the cluster: + + +```bash +fluvio cluster shutdown --local +``` + +Restart the cluster: + + +```bash +fluvio cluster upgrade +``` + +### Consume from edge cluster + +First, on the `edge` terminal, check the status of the home cluster from the Cloud: + +```bash +fluvio cluster home +``` +```bash +HOME ROUTE SC STATUS SPU STATUS LAST SEEN ERRORS +home localhost:30003 Connected Connected 1s - +``` + +Then, consume from the edge cluster: + + +```bash +fluvio consume edge-topic -B +``` + +``` +1 +2 +3 +4 +5 +``` + +Produce records and observe that the mirror will resume the synchronization. + +🎉 Congratulations! These tests confirm that the synchronization from Edge to Cloud works as expected. It is now time to roll it out in your environment. + +Join us on [Discord] if you have questions, or would like to suggest new improvements. + + +[Discord]: https://discord.com/invite/bBG2dTz + +#### Related +* [Try Mirroring] +* [Try Mirroring - Cloud] +* [IoT Mirroring - Raspberry Pi to a Local Cluster] +* [Discord] + + +[Try Mirroring]: fluvio/tutorials/mirroring-two-clusters.mdx +[Try Mirroring - Cloud]: cloud/tutorials/try-mirroring-cloud.mdx +[IoT Mirroring - Raspberry Pi to a Local Cluster]: fluvio/tutorials/mirroring-iot-local.mdx +[Discord]: https://discord.com/invite/bBG2dTz +[https://infinyon.cloud/ui/signup]: https://infinyon.cloud/ui/signup +[Install OrbStack]: https://orbstack.dev diff --git a/versioned_docs/version-0.13.0/cloud/tutorials/mqtt-to-sql.mdx b/versioned_docs/version-0.13.0/cloud/tutorials/mqtt-to-sql.mdx new file mode 100644 index 00000000..6bfe829d --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/tutorials/mqtt-to-sql.mdx @@ -0,0 +1,508 @@ +--- +sidebar_position: 31 +title: "Create an MQTT to SQL Data Pipeline" +description: "Data pipeline that listens for mqtt events, converts them to json, and sends them to a SQL database." +--- + +At the end of this tutorial, we will see data starting from an MQTT broker and ending in a PostgreSQL table. + +We'll use 2 connectors: +* [Inbound MQTT connector] +* [Outbound SQL connector] + * There will be an example of combining multiple SmartModules, known as **SmartModule chaining** + +The Outbound connector will be using a [PostgreSQL] database. It will listen to the topic for new records and insert them into a table. + +You can use your own PostgreSQL instance, if it can be reached over the internet. But you can still follow along by creating a PostgreSQL database at a hosting service, such as [ElephantSQL]. + +## Setup +### Start MQTT Connector + +This connector expects to take `json` input from the MQTT broker, from an MQTT topic named `ag-mqtt-topic`. These parameters will be reflected in the final JSON payload that gets produced to the fluvio topic `mqtt-topic` + + +MQTT connector config: `mqtt.yml` + +:::info +All versions are marked with `x.y.z`. To find the latest version, run: +* `fluvio hub connector list` +* `fluvio hub smartmodule list` +::: + +```yaml +# mqtt.yml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: fluvio-mqtt-connector + type: mqtt-source + topic: mqtt-topic + direction: source + +mqtt: + url: "mqtt://test.mosquitto.org/" + topic: "ag-mqtt-topic" + timeout: + secs: 30 + nanos: 0 + payload_output_type: json +``` + +#### Create MQTT connector + + +```bash +$ fluvio cloud connector create --config mqtt.yml +``` + +#### Install `mosquito` - MQTT client + +First install [mosquito] to follow later steps for sending JSON to our test MQTT broker + +-> On MacOS, you can install `mosquitto` with homebrew with the following command:
    `brew install mosquitto` + + +### Start SQL connector(s) + +You can start one of both of the following connectors + +1. **[Connector with no transformation][sql-connector-with-no-transformation]** + 1. Download SmartModule for example + 2. Example connector config + 3. Start connector +2. **[Connector with extra JSON to JSON transformation][connector-with-json-to-json-transformation]** + 1. Download SmartModules for example + 2. Example connector config + 3. Start connector + + +#### SQL Connector with no transformation + +##### Download [json-sql] SmartModule + +Example output + + +```bash +$ fluvio hub sm download infinyon/json-sql@x.y.z +downloading infinyon/json-sql@x.y.z to infinyon-json-sql-x.y.z.ipkg +... downloading complete +... checking package +trying connection to fluvio router.dev.infinyon.cloud:9003 +... cluster smartmodule install complete +``` + +##### SQL Connector with no transformation config + + +```yaml +# sql.yml +apiVersion: 0.1.0 +meta: + name: fluvio-sql-connector + type: sql-sink + version: x.y.z + topic: mqtt-topic + +sql: + url: "postgres://user:password@db.postgreshost.example/dbname" +transforms: + - uses: infinyon/json-sql@x.y.z + with: + invoke: insert + mapping: + table: "topic_message" + map-columns: + "device_id": + json-key: "payload.device.device_id" + value: + type: "int" + default: "0" + required: true + "record": + json-key: "$" + value: + type: "jsonb" + required: true +``` + +Start No transformation connector SQL connector + + +```bash +$ fluvio cloud connector create --config sql.yml +``` + +#### Connector with JSON to JSON transformation + +Download the [Jolt] and [Json-Sql] SmartModules used by this example connector + +Example output + + +```bash +$ fluvio hub sm download infinyon/json-sql@x.y.z +downloading infinyon/json-sql@x.y.z to infinyon-json-sql-x.y.z.ipkg +... downloading complete +... checking package +trying connection to fluvio router.infinyon.cloud:9003 +... cluster smartmodule install complete +``` + + +```bash +$ fluvio hub sm download infinyon/jolt@x.y.z +downloading infinyon/jolt@x.y.z to infinyon-jolt-x.y.z.ipkg +... downloading complete +... checking package +trying connection to fluvio router.infinyon.cloud:9003 +... cluster smartmodule install complete +``` + +##### Connector with JSON to JSON transformation config + + +```yaml +# sql-chain.yml +apiVersion: 0.1.0 +meta: + name: fluvio-sql-connector-chain + type: sql-sink + version: x.y.z + topic: mqtt-topic + +sql: + url: "postgres://user:password@db.postgreshost.example/dbname" + rust_log: "sql_sink=INFO,sqlx=WARN" +transforms: + - uses: infinyon/jolt@x.y.z + with: + spec: + - operation: shift + spec: + payload: + device: "device" + - operation: default + spec: + device: + type: "mobile" + - uses: infinyon/json-sql@x.y.z + with: + invoke: insert + mapping: + table: "topic_message" + map-columns: + "device_id": + json-key: "device.device_id" + value: + type: "int" + default: "0" + required: true + "record": + json-key: "$" + value: + type: "jsonb" + required: true +``` + +Start SQL connector with JSON [transformation] + + +```bash +$ fluvio cloud connector create --config sql-chain.yml +``` + +#### Install `pgcli` - PostgreSQL client + +Install `pgcli` to follow the later DB validation steps +[https://www.pgcli.com] + +:::tip + On MacOS, you can install `pgcli` with homebrew with the following command: + + ```bash + $ brew install pgcli + ``` +::: + +## The actual test + +📋 **Using example JSON, this is the sequence of events that will occur** + +1. (user) [Publish JSON to MQTT broker][#publish-json-to-mqtt-broker"] +2. (Inbound MQTT connector) Produce data to fluvio topic `mqtt-topic` + 1. Produce a transformed JSON object with config parameter data with the name of the MQTT topic embedded +3. (Outbound SQL connector) Consume the inbound record from topic `mqtt-topic` + 1. Apply transformations to record (JSON to JSON connector only) + 2. Insert record into DB +4. (user) [Validate JSON record in PostgreSQL database] + +:::tip + If you are starting with a new database, you will need to create the table before sending messages to MQTT. It is not created automatically. + + Table create query + + ```sql + create table topic_message(device_id int, record jsonb); + ``` +::: + +This is what our input JSON to MQTT looks like + +example JSON (formatted) + +```json +{ + "device": { + "device_id": 17, + "name": "device17" + } +} +``` + +### Publish JSON to MQTT broker + +Run the following to send a test JSON message to the demo MQTT broker with `mosquito` ([Installation steps]) + +Command: + + +```bash +$ mosquitto_pub -h test.mosquitto.org -t ag-mqtt-topic -m '{"device": {"device_id":17, "name":"device17"}}' +``` + +Produced data in topic: + + +```bash +$ fluvio consume mqtt-topic -B +Consuming records from the beginning of topic 'mqtt-topic' +{"mqtt_topic":"ag-mqtt-topic","payload":{"device":{"device_id":17,"name":"device17"}}} +``` + +Produced data in topic: +Run the following to connect to PostgreSQL DB with `pgcli` ([Installation steps] + + +### View output in PostgreSQL + +Use `pgcli` to examine the database. + + +```bash +$ pgcli -U user -h db.postgreshost.example -p 5432 dbname +``` + +Check that the JSON from MQTT has been inserted into table + + +```sql +select * from topic_message; +``` + +Example output from both connectors + +```txt ++-----------+-----------------------------------------------------------------------------------------------+ +| device_id | record | +|-----------+-----------------------------------------------------------------------------------------------| +| 17 | {"payload": {"device": {"name": "device17", "device_id": 17}}, "mqtt_topic": "ag-mqtt-topic"} | +| 17 | {"device": {"name": "device17", "type": "mobile", "device_id": 17}} | ++-----------+-----------------------------------------------------------------------------------------------+ +SELECT 2 +Time: 0.080s +``` + +Output explanation: + +In both cases, we’ve used the device_id key in the MQTT JSON as the value in the column of the same name. +The first row is from our No Transformation connector. The record data appears unchanged from what we saw in the topic. + + +Resulting record + +```json +{ + "payload": { + "device": { + "name": "device17", + "device_id": 17 + } + }, + "mqtt_topic": "ag-mqtt-topic" +} +``` + + +The second row is from our JSON to JSON transformation connector +We’ve `shifted` the topic JSON data, so it more closely resembles the original JSON. + +Then we enrich the payload by adding the `.device.type` key with the value mobile before inserting into the DB + +```json +{ + "device": { + "name": "device17", + "type": "mobile", + "device_id": 17 + } +} +``` + +## Move transformation to MQTT Connector + +* [Transformations] in the `transforms` section of SQL Connector config are deliberately decoupled from connectors. +We can move a SmartModule from an Inbound to an Outbound connector and accomplish the same result. +The decision depends on the shape of the data you want to store in a topic. +For Inbound connectors, the data is transformed before sending to Fluvio topic, while for Outbound, it happens after the data is sent to Fluvio topic +but before it is sent to the connector. + +Let's try it. + +Modify our `mqtt.yml` config with one transformation that we are moving from the SQL Connector: + + +```yaml +# mqtt.yml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: fluvio-mqtt-connector + type: mqtt-source + topic: mqtt-topic + direction: source + +mqtt: + url: "mqtt://test.mosquitto.org/" + topic: "ag-mqtt-topic" + timeout: + secs: 30 + nanos: 0 + payload_output_type: json + +transforms: + - uses: infinyon/jolt@x.y.z + with: + spec: + - operation: shift + spec: + payload: + device: "device" + - operation: default + spec: + device: + type: "mobile" +``` + +We don’t need this transformation on SQL Connector anymore, remove it from `sql-chain.yml` file: + + +```yaml +# sql-chain.yml +apiVersion: 0.1.0 +meta: + name: fluvio-sql-connector-chain + type: sql-sink + version: x.y.z + topic: mqtt-topic + +sql: + url: "postgres://user:password@db.postgreshost.example/dbname" + +transforms: + - uses: infinyon/json-sql@x.y.z + with: + invoke: insert + mapping: + table: "topic_message" + map-columns: + "device_id": + json-key: "device.device_id" + value: + type: "int" + default: "0" + required: true + "record": + json-key: "$" + value: + type: "jsonb" + required: true +``` + +We need to re-create connectors: + + +```bash +$ fluvio cloud connector delete fluvio-mqtt-connector +$ fluvio cloud connector create --config mqtt.yml +``` + +also, we delete one now obsolete SQL connector and re-create another without the transformation that we moved to MQTT: + + +```bash +$ fluvio cloud connector delete fluvio-sql-connector-chain +$ fluvio cloud connector delete fluvio-sql-connector +$ fluvio cloud connector create --config sql-chain.yml +``` + +And now, if we execute command: + + +```bash +$ mosquitto_pub -h test.mosquitto.org -t ag-mqtt-topic -m '{"device": {"device_id":17, "name":"device17"}}' +``` + +The new record differs from what we saw previously: + + +```bash +$ fluvio consume mqtt-topic -B +Consuming records from the beginning of topic 'mqtt-topic' +{"mqtt_topic":"ag-mqtt-topic","payload":{"device":{"device_id":17,"name":"device17"}}} +{"device":{"device_id":17,"name":"device17","type":"mobile"}} +``` + +We can see that the record was transformed before producing to Fluvio cluster. + +However, in the database table, the new record equals to the previous one. + +```txt ++-----------+-----------------------------------------------------------------------------------------------+ +| device_id | record | +|-----------+-----------------------------------------------------------------------------------------------| +| 17 | {"payload": {"device": {"name": "device17", "device_id": 17}}, "mqtt_topic": "ag-mqtt-topic"} | +| 17 | {"device": {"name": "device17", "type": "mobile", "device_id": 17}} | +| 17 | {"device": {"name": "device17", "type": "mobile", "device_id": 17}} | ++-----------+-----------------------------------------------------------------------------------------------+ +SELECT 3 +Time: 0.080s +``` + +Although the final result is the same (the same records will end up in SQL database with the same content), choosing the proper side +of a pipeline where transformations should reside may significantly affect performance on high volumes of data. + +## Conclusion + +After setting up our end-to-end MQTT to SQL scenario, we were able to send JSON data to the MQTT broker and track the data to the PostgreSQL table. + +We saw the results for the JSON just being inserted into the table with the `json-sql` SmartModule. + +Using SmartModule chaining with the `jolt` and `json-sql` SmartModules, we observed that the resulting JSON was successfully transformed. + +We can choose on which side of a pipeline we wanted to transform our data without material impact to the result. + +[sql-connector-with-no-transformation]: #sql-connector-with-no-transformation +[connector-with-json-to-json-transformation]: #connector-with-json-to-json-transformation +[publish-json-to-mqtt-broker]: #publish-json-to-mqtt-broker +[Inbound MQTT connector]: hub/connectors/inbound/mqtt.mdx +[Outbound SQL connector]: hub/connectors/outbound/sql.mdx +[PostgreSQL]: https://www.postgresql.org/ +[ElephantSQL]: https://www.elephantsql.com/ +[mosquito]: https://mosquitto.org/download/ +[json-sql]: hub/smartmodules/json-sql.mdx +[Json-sql]: hub/smartmodules/json-sql.mdx +[Jolt]: hub/smartmodules/jolt.mdx +[transformation]: fluvio/concepts/transformations.mdx +[Transformations]: fluvio/concepts/transformations.mdx +[https://www.pgcli.com]: https://www.pgcli.com/ diff --git a/versioned_docs/version-0.13.0/cloud/tutorials/try-mirroring-cloud.mdx b/versioned_docs/version-0.13.0/cloud/tutorials/try-mirroring-cloud.mdx new file mode 100644 index 00000000..9e178f4b --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/tutorials/try-mirroring-cloud.mdx @@ -0,0 +1,235 @@ +--- +sidebar_position: 40 +title: "Mirroring local clusters to the Cloud" +description: "Try Mirroring on the Cloud as Home" +--- + +This tutorial is a demonstration of mirroring locally with two clusters on docker. + +## Home and Remote + +To understand mirroring, we need to understand what is a Home and a Remote cluster: + +- Home cluster is the target cluster that will receive and consume data. +- Remote cluster is the source cluster that will send data. + + +## Install Fluvio + +### Create a Cloud account + +Using your web browser, navigate to [https://infinyon.cloud/ui/signup], where this experimental feature is available. + +After the account is created, you will be placed in the Dashboard. You may choose to create a cluster in the GUI. In this tutorial, we'll create a cluster using the CLI later. + + +### Download `fluvio` binary + +Use `curl` to download and install: + +```bash +curl -fsS https://hub.infinyon.cloud/install/install.sh | bash +``` + +Make sure to add `.fluvio/bin` to the `$PATH`as specified in the installation script. + + +### Login to InfinyOn Cloud + +Login to InfinyOn Cloud: + + +```bash +fluvio cloud login --use-oauth2 +``` + +Leave out `--use-oauth2` if you prefer username/password method. + +### Provision a new Cluster + +Let's provision a new cloud cluster + + +```bash +fluvio cloud cluster create +``` + +Check the result with: + + +```bash +fluvio cluster status +``` + +Next, we'll configure the cluster to receive traffic from the remote clusters. + + +### Register Remote clusters on the Home + +Use the `remote` CLI to register the remote clusters with the home cluster: + + +```bash +fluvio remote register edge-remote +``` + +List remote clusters to check their status: + +```bash +fluvio remote list +``` + +It should show the following: + +```bash + REMOTE SC STATUS SPU STATUS LAST SEEN ERRORS + edge-remote Waiting Waiting - - +``` + +### Create the mirror topic + +Mirror topics on the home clusters has multiple partitions, where each partition has a `1-to-1` relationship with the remote cluster. + +Create a partition assignment file to define the remote devices: + + +```bash +echo '["edge-remote"]' > assignment_file.json +``` + +Apply the configuration file to create the topic: + + +```bash +fluvio topic create mirror-topic --mirror-apply assignment_file.json +``` + +List partitions to check the assignment: + + +```bash +fluvio partition list +``` + +It should display all partitions: + +```bash + TOPIC PARTITION LEADER MIRROR REPLICAS RESOLUTION SIZE HW LEO LRS FOLLOWER OFFSETS + mirror-topic 0 5001 edge-remote [] Online 0 B 0 0 0 0 [] +``` + +### Generate Metadata for Remote Clusters from the Cloud Cluster + +Each remote cluster requires a unique metadata file that gives the remote cluster the information to connect to the home cluster and the topic/mirror where the data is synchronized. + +Generate a metadata file for the remote: + +```bash +fluvio cloud remote export edge-remote --file edge-remote.json +``` + +# Connect to the Home Cluster from the Remote + +First, we'll start a local cluster: + +```bash +fluvio cluster start +``` + +Then, connect to the home cluster: + +```bash +fluvio home connect --file edge-remote.json +``` + +Let's check the partitions: + + +```bash +fluvio partition list +``` + +The remote device should show the following partition:: + +```bash + TOPIC PARTITION LEADER MIRROR REPLICAS RESOLUTION SIZE HW LEO LRS FOLLOWER OFFSETS + mirror-topic 0 5001 c7f891c9-f1f9-4d28-b181-9777d8d07731:0:router.infinyon.cloud:9005 [] Online 0 B 0 0 0 0 [] +``` + + +Also, check the home status with: + + +```bash +fluvio home status +``` + +It should show the following: + +```bash + HOME ROUTE SC STATUS SPU STATUS LAST SEEN ERRORS + c7f891c9-f1f9-4d28-b181-9777d8d07731 router.infinyon.cloud:9003 Connected Connected 1s - +``` + +--- + +## Producing and Consuming on Mirroring + +Let's produce on the remote and consume from the home cluster. + +### Produce to remote cluster + +Produce with: + + +```bash +fluvio produce mirror-topic +``` + +```bash +> A +Ok! +> B +Ok! +``` + + +### Consume from Home cluster + +First, switch to the cloud profile: + +```bash +fluvio profile switch +``` + +To know your cloud profile, run: + +```bash +fluvio profile list +``` + +Then, consume with: + +```bash +fluvio consume mirror-topic --mirror docker-remote -B +``` + +```bash +A +B +``` + +🎉 Congratulations! You have successfully tested mirroring on the cloud. + + +#### Related +* [Try Mirroring - Docker] +* [IoT Mirroring - Cloud] +* [IoT Mirroring - Raspberry Pi to a remote] +* [Discord] + +[Discord]: https://discord.com/invite/bBG2dTz +[Try Mirroring - Docker]: fluvio/tutorials/mirroring-two-clusters.mdx +[IoT Mirroring - Cloud]: cloud/tutorials/iot-mirroring-cloud.mdx +[IoT Mirroring - Raspberry Pi to a remote]: fluvio/tutorials/mirroring-iot-local.mdx +[https://infinyon.cloud/ui/signup]: https://infinyon.cloud/ui/signup diff --git a/versioned_docs/version-0.13.0/cloud/tutorials/webhook-to-slack.mdx b/versioned_docs/version-0.13.0/cloud/tutorials/webhook-to-slack.mdx new file mode 100644 index 00000000..58dbbe27 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/tutorials/webhook-to-slack.mdx @@ -0,0 +1,196 @@ +--- +sidebar_position: 20 +title: "Create an Data Pipeline from Webhook to Slack" +description: "Event data pipeline that receives events from an InfinyOn Cloud Webhook and sends an alert to Slack" +--- + +This guide shows an end-to-end event pipeline that reads an event from a webhook, generates a formatted string, and publishes the result to Slack. While this is a simple example, it has many event notification use cases, such as: + +* submission from website forms (via [Cloudflare workers] or your custom backend). +* activity from e-commerce platforms on purchases and shopping carts. +* notifications from github on your projects' activities. +* alerts from financial products on your transactions. +* notifications from any product that can invoke a webhook. + +This pipeline uses the following features: + +* [webhook]: that creates a public API to receive external events, transform them, and publish them to a topic. +* [http-sink]: that listens to the same topic and publishes them on Slack. + + +## Objective + +Show an example of how to build an event streaming pipeline that receives webhook events, transforms the input into a readable form, and generates an alert. We assume the events are generated by a user submitting a form, and we'll format it accordingly. + +## Prerequisites + +* [Fluvio CLI] installed locally +* Account on [InfinyOn Cloud] + +## Step-by-Step + +1. [Create webhook configuration file](#create-webhook-configuration-file) +2. [Create http-sink configuration file](#create-http-sink-configuration-file) +3. [Download SmartModules](#download-smartmodules) +4. [Start Webhook and Connector](#start-webhook-and-connector) +5. [Test Data Pipeline](#test-data-pipeline) + +### Create webhook configuration file + +Create a webhook configuration file called `form-webhook.yaml`: + +:::info +All versions are marked with `x.y.z`. To find the latest version, run: +* `fluvio hub connector list` +* `fluvio hub smartmodule list` +::: + +```yaml title="quotes-source-connector.yml" +meta: + name: form-webhook + topic: form-events +webhook: + outputParts: body + outputType: json +transforms: + - uses: infinyon-labs/json-formatter@x.y.z + with: + spec: + match: + - key: "/type" + value: "subscribe" + format: + with: "📢 {} ({}) subscribed on {}" + using: + - "/name" + - "/email" + - "/source" + output: "/formatted" + - key: "/type" + value: "use-case" + format: + with: ":confetti_ball: {} ({}) wants to solve the following '{}' use-case:\n>{}" + using: + - "/name" + - "/email" + - "/source" + - "/description" + output: "/formatted" + default: + format: + with: "{} ({}) submitted a request" + using: + - "/name" + - "/email" + output: "/formatted" +``` + +The webhook reads the JSON body, applies the `json-formatter` smartmodule to generate readable text, and writes the new record to a topic called `form-events`. Checkout [labs-json-formatter-sm] in github for additional information. + + +### Create http-sink configuration file + +Create an HTTP source connector configuration file called `slack-form-alerts.yaml` : + +```yaml title="slack-form-alerts.yaml" +apiVersion: 0.1.0 +meta: + version: x.y.z + name: slack-form-alerts + type: http-sink + topic: form-events + secrets: + - name: SLACK_USER_ALERTS +http: + endpoint: "https://hooks.slack.com/services/${{ secrets.SLACK_USER_ALERTS }}" + headers: + - "Content-Type: application/json" +transforms: + - uses: infinyon/jolt@x.y.z + with: + spec: + - operation: shift + spec: + "formatted": "text" +``` + +The sink connector reads from the `form-events` topic and uses the `jolt` smartmodule to shift the formatted string into a field called `text` per the Slack instructions. Checkout [fluvio-jolt] in github for additional information. + +#### Add Slack webhook token to InfinyOn Secrets + +The Slack webhook link is sensitive information, let's add the [Access Token] part to `secret` in [InfinyOn Cloud] : + +```bash +$ fluvio cloud secret set SLACK_USER_ALERTS +``` + +Check out [Slack Webhooks] on how to create the webhook token. + + +### Download SmartModules + +Download the smartmodules used by the webhook ad the connector: + +```bash +$ fluvio hub download infinyon/jolt@x.y.z +$ fluvio hub download infinyon-labs/json-formatter@x.y.z +``` + +Check `fluvio smartmodule list` to ensure they've been downloaded. + + +### Start Webhook and Connector + +Start webhook listener: + +```bash +$ fluvio cloud webhook create --config form-webhook.yaml +``` +Check `fluvio cloud webhook list` to ensure it has been successfully provisioned. In checkout the webhook link that we'll use to test the pipeline: `https://infinyon.cloud/webhooks/v1/[token]` + + +Start sink connector: + +```bash +$ fluvio cloud connector create -c slack-form-alerts.yaml +``` + +Check `fluvio cloud connector list` to ensure it has been successfully provisioned. + + +### Test Data Pipeline + +Use `curl` to send a POST request with a fictitious user request to our webhook link. In production environments, this iw what a website would send: + +```bash +$ curl -X POST https://infinyon.cloud/webhooks/v1/ \ + -H "Content-Type: application/json" \ + -d '{ "email": "alice@acme.com", "name": "Alice Liddell", "type": "subscribe", "source": "front-page" }' +``` + +The following alert is displayed in Slack: + +```bash +`📢 Alice Liddell ("alice@acme.com) subscribed on front-page` will show-up in your slack channel. +``` +That's all folks! + +### Reference + +* [Webhook Basics] +* [Cloudflare workers] +* [JSON formatter SmartModule] + +--- + +[webhook]: cloud/how-to/use-webhooks.mdx +[Webhook Basics]: cloud/how-to/use-webhooks.mdx +[http-sink]: https://github.com/infinyon/http-sink-connector +[labs-json-formatter-sm]: https://github.com/infinyon/labs-json-formatter-sm +[JSON formatter SmartModule]: https://github.com/infinyon/labs-json-formatter-sm +[fluvio-jolt]: https://github.com/infinyon/fluvio-jolt +[Fluvio CLI]: fluvio/quickstart.mdx#install-fluvio +[InfinyOn Cloud]: https://infinyon.cloud/ui/signup +[Access Token]: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token +[Slack Webhooks]: https://api.slack.com/messaging/webhooks +[Cloudflare workers]: https://workers.cloudflare.com diff --git a/versioned_docs/version-0.13.0/cloud/tutorials/zappier-triggers.mdx b/versioned_docs/version-0.13.0/cloud/tutorials/zappier-triggers.mdx new file mode 100644 index 00000000..89400787 --- /dev/null +++ b/versioned_docs/version-0.13.0/cloud/tutorials/zappier-triggers.mdx @@ -0,0 +1,211 @@ +--- +sidebar_position: 120 +title: "Create a Data Pipeline between InfinyOn Cloud and Zapier" +description: "This guide describes how to send events back and forth between InfinyOn Cloud and Zapier" +--- + +# How to Link Zapier and InfinyOn Cloud + +If you're using both Zapier and InfinyOn Cloud, integrating the two can supercharge your workflows. Zapier's automation prowess can link up with InfinyOn Cloud's robust data streaming, allowing you to move and transform data and trigger actions seamlessly between them. This can not only save you time but also unlock new possibilities for your real-time data. + +To follow along in this guide you need: +* Access to Zapier Premium apps +* InfinyOn Cloud cluster +* Google Forms trigger to Zapier +* Google Sheets trigger to Zapier + +## Linking Zapier to InfinyOn Cloud + +This section covers sending events from Zapier Zaps to InfinyOn Cloud Webhooks. + +### Pre-requisites +* An [InfinyOn Cloud] account +* `fluvio` CLI installed connected to Cloud account with `fluvio cloud login` +* A Zapier account with access to [Zapier Webhooks] +* A Zap trigger. This example will use new survey responses from Google Forms + +### On InfinyOn cloud cluster +First step is to create a webhook in Infinyon Cloud. + +Copy the following example webhook config file and save as `example-webhook.yaml` + + +```yaml title="example-webhook.yaml" +meta: + name: from-zapier + topic: zapier-events + +# optional +webhook: + outputParts: body + outputType: json +``` + +Then run `fluvio cloud webhook create` to create the webhook + + +```shell +$ fluvio cloud webhook create --config example-webhook.yaml +Webhook "zapier-events" created with url: https://infinyon.cloud/webhooks/v1/[random string] +``` + +We'll use the url for the next step in Zapier + +### In Zapier dashboard + +* First thing is to create or modify an existing Zap. - We've created a new Zap that triggers whenever a new response to our Google Form survey arrives + * We won't cover the configuration of Google Form trigger further in this example + + + +#### Configuring Webhooks step + +* Add a step and choose the `Webhooks by Zapier` action + + +* Under **App & Event**, select a `POST` event and click continue + + +* Under **Action**, for the `URL` field, paste the URL for your InfinyOn Cloud webhook +* Map the answers from survey with keys to form into json to InfinyOn Cloud and click the continue button + + + +### Test the Zapier to InfinyOn Cloud workflow +Test the event gets sent to InfinyOn Cloud +In a terminal create a CLI consumer for the webhook's with the following command: + + +```shell +$ fluvio consume zapier-events +Consuming records from 'zapier-events' +``` + +In our example, we have two fields + + +Send a new Google Form response + + +Then click the Test Step/Retest button test triggering the event with our survey response from the previous step. + + + +In our consumer terminal, we get this example output: + +```shell +$ fluvio consume zapier-events +Consuming records from 'zapier-events' +{"name": "Stacey Fakename", "feedback": "Yes"} +``` + +Then you can click Publish to save this Zap. It is ready for production! It is ready to send data to InfinyOn Cloud. + + + +The rest of this guide will cover data flow in the opposite direction. + +## Linking InfinyOn Cloud to Zapier + +This covers triggering Zapier Zaps with InfinyOn Cloud's outbound HTTP Connector + +### Pre-requisites +* An [InfinyOn Cloud] account +* `fluvio` CLI installed connected to Cloud account with `fluvio cloud login` +* Zapier account with access to [Zapier Webhooks] +* An action to save event from a trigger (We're using Google Sheets) + +### In Zapier dashboard + +1. In Zapier, create a new Zap that is triggered by webhook. Copy the Zapier url for the next step + * For our example action we will add a row per InfinyOn Cloud event into an existing Google Sheet with 2 columns + + + +In the Test section, copy the url. We need this webhook url to configure InfinyOn Cloud as a data source for this Zap. + + + +We'll pause on configuring this Zap for a moment, and be back after the next section. + +### On InfinyOn Cloud cluster + +1. In InfinyOn Cloud, we need to create an outbound HTTP connector. + * Replace the url from the previous step for the `endpoint` field. + +:::info +All versions are marked with `x.y.z`. To find the latest version, run: +* `fluvio hub connector list` +* `fluvio hub smartmodule list` +::: + +```yaml +# zapier-connector.yaml +apiVersion: 0.1.0 +meta: + version: x.y.z + name: zapier + type: http-sink + topic: cloud-event +http: + method: POST + endpoint: https://hooks.zapier.com/hooks/catch/########/xxxxxxx/ +``` + +Then create the connector with the following command + + +```shell +$ fluvio cloud connector create -c zapier-connector.yaml +``` + +Data that is sent to the `cloud-event` topic will be sent as HTTP POST to the Zapier workflow. + +In the next section we'll test the end to end. But first we'll send some data to test the event triggers the Zapier Zap + * Create a CLI Producer to the connector's `cloud-event` topic + + +```shell +$ fluvio produce cloud-event +> {"full_name": "Stacie Fakename", "data": "Hello from Cloud"} +``` + +### Test the InfinyOn Cloud to Zapier workflow + +Back to Zapier to edit the webhook Zap. Click Find New Records to see the data we sent from the previous step. Click the Continue with the selected record button. + + +* Create a new step. Select Google Sheets + + +* We want to create a new row in Sheets. Select Create Spreadsheet Row. + + +* In the Account section, you'll link your Google account to link to Sheets. This step won't be covered in detail + +* In the Action section, select a spreadsheet with column headers, and select Refresh fields to populate the section with inputs. Map the webhook fields from the trigger to fields in the spreadsheet and click continue. + + +* In the Test section, click Test/Retest step to create a row in the spreadsheet with the webhook data. + + +* Confirm in the spreadsheet that the data is mapped into the columns. + + + +Then you can click Publish to save this Zap to put it into production. It is ready to trigger from InfinyOn Cloud events. + + + +This is the end of the guide. You should now have bi-directional data flow between Zapier and InfinyOn Cloud. + +--- +[Fluvio CLI]: /docs/fluvio/quickstart#install-fluvio +[InfinyOn Cloud]: https://infinyon.cloud/ui/signup +[http-source]: https://github.com/infinyon/http-source-connector +[rss-json]: https://github.com/infinyon/labs-rss-json-sm +[jolt]: https://github.com/infinyon/fluvio-jolt +[array-map-json]: https://github.com/infinyon/labs-array-map-json-sm +[How to Stream and Transform Data from Hacker News RSS Feed (YouTube Video)]: https://www.youtube.com/watch?v=raV5q6paAPM&t=1s&ab_channel=InfinyOn +[Zapier Webhooks]: https://zapier.com/features/webhooks + diff --git a/versioned_docs/version-0.13.0/connectors/cdk.mdx b/versioned_docs/version-0.13.0/connectors/cdk.mdx new file mode 100644 index 00000000..26fcb588 --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/cdk.mdx @@ -0,0 +1,22 @@ +--- +sidebar_position: 40 +title: "CLI `cdk`" +description: "cdk is a tool for developing Fluvio connectors" +--- + +`cdk` is a fluvio connector development kit cli utility to help you quickly create, test, and publish connectors. + +Usage: + +```bash copy="fl" +$ cdk --help +``` + +The commands for cdk are: + +- `cdk generate` - Create a new connector project template +- `cdk build` - Build the connector in the current dir +- `cdk test` - run the connector in the current dir for testing +- `cdk deploy` - run the connector for local deployment +- `cdk publish` - publish the connector to the InfinyOn Hub. A cloud account is required for this. +- `cdk hub` - list and download existing connectors to configure and run locally diff --git a/versioned_docs/version-0.13.0/connectors/configuration.mdx b/versioned_docs/version-0.13.0/connectors/configuration.mdx new file mode 100644 index 00000000..bcc5fd5a --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/configuration.mdx @@ -0,0 +1,101 @@ +--- +sidebar_position: 50 +title: "Configuration File" +description: "Connector Configuration File" +--- + +# Connector Configuration File + +This is a template YAML connector file that needs to be populated to be functional. In the next section, we will explore the different sections of the connector configuration file and how to properly configure each one. + +```yaml title="connector.yaml" +apiVersion: 0.1.0 + +meta: + name: + version: + type: + topic: + + # optional (inbound connectors) + producer: + # optional + linger: + # optional + batch-size: + # optional + compression: + + # optional (outbound connectors) + consumer: + # optional + partition: + # optional + max_bytes: + + # optional + secrets: + - name: secret_1 + +# optional +transforms: + - uses: smartmodule_name + with: + param_name: param_value +``` + +## Connector `apiVersion` configuration + +The `apiVersion` is the version of the connector API that the connector uses to parse the configuration file. The current accepted version is `0.1.0`. + +## Connector `meta` configuration + +The `meta` section contains the metadata for the connector: + +* The `name` is the name of the connector. e.g. `my-connector`. +* The `type` is the type of the connector. e.g. `http-source`, `http-sink`, `mqtt-source`. + * See the [connectors] section for the full list of connectors supported. +* The `version` is the version of the connector. e.g. `0.1.0`. +* The `topic` is the topic that the connector will connect to. e.g. `my-topic`. The topic will be created automatically if it does not exist. +* The `secrets`(optional) is a list of secrets that the connector will use. This accepts a list of objects with the key `name`. + * See the [secrets] section for more information. +* The `producer`(optional) is the producer configuration for the connector. Currently, this is only used for `source`/`inbound` connectors. The current supported configurations are `linger`, `compression` and `batch_size`. All configurations are optional. See examples to a list of valid values for each configuration. +* The `consumer`(optional) is the consumer configuration for the connector. Currently, this is only used for `sink`/`outbound` connectors. The current supported configurations are `id`, `partition`, `max_bytes` and `offset`. All configurations are optional. See examples to a list of valid values for each configuration. + +At minimum connector configuration would look like: + +```yaml +apiVersion: 0.1.0 +meta: + name: my-connector + type: my-connector-type + version: x.y.z + topic: my-topic +``` + +All other fields are optional. + +:::info +The `x.y.z` version should be updated with the version of the latest connector in [the Hub]. +::: + +## Connector `transforms` configuration + +Connectors support `transforms`. Records can be modified before they are sent to the topic. The `transforms` section is a list of `transform` objects. Each `transform` object has an `uses` and a `with` section. + +* `uses` is the reference to the SmartModule used in the transform. +* `with` is the configuration for the transform + + +### References + +The following references will provide additional information for: + +* [SmartModules] for additional information on transformations +* [Tutorials] for end-to-end examples + + +[SmartModules]: smartmodules/overview.mdx +[Tutorials]: fluvio/tutorials/index.md +[connectors]: connectors/overview.mdx +[the Hub]: hub/connectors/index.md diff --git a/versioned_docs/version-0.13.0/connectors/connector-hub.mdx b/versioned_docs/version-0.13.0/connectors/connector-hub.mdx new file mode 100644 index 00000000..2084e985 --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/connector-hub.mdx @@ -0,0 +1,10 @@ +--- +sidebar_position: 10 +title: Connector Hub (link) +description: "Official Connectors store in InfinyOn Hub" +href: hub/connectors/index.md +--- + +Connectors are listed in [the Hub]. + +[the Hub]: hub/connectors/index.md \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/connectors/developers/_category_.json b/versioned_docs/version-0.13.0/connectors/developers/_category_.json new file mode 100644 index 00000000..f209afcc --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/developers/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "For Developers", + "collapsed": true, + "position": 100 +} diff --git a/versioned_docs/version-0.13.0/connectors/developers/build.mdx b/versioned_docs/version-0.13.0/connectors/developers/build.mdx new file mode 100644 index 00000000..d7dc1bcb --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/developers/build.mdx @@ -0,0 +1,52 @@ +--- +sidebar_position: 3 +title: "Build & Test" +description: "Building and testing a Connector" +--- + +This section assumes `my-connector` project has been [generated]. + +### Build Connector + +Inside the `my-connector` project directory run `build`: + +```bash copy="fl" +$ cdk build +... +Finished `release` profile [optimized] target(s) +``` + +The build process generated a binary for your custom connector. We are now ready to test it. + +### Test Connector + +You need a configuration file to start a connector The project automatically generated a sample config `sample-config.yaml` that we can use as a starting point. + +Use `cdk test` to start the instance: + +```bash copy="fl" +$ cdk test --config sample-config.yaml + Finished release [optimized] target(s) in 0.16s +Connector runs with process id: 80380 +Starting my-connector source connector with CustomConfig { foo: "bar" } +``` + +Connector output will be redirected to the current terminal output. To stop running Connector in test mode, press Ctrl+C. + +#### Test Result + +The connector produces `Hello, Fluvio` to the topic `test-my-connector-topic`. Let's check it out: + + +```bash copy="fl" +$ fluvio consume test-my-connector-topic -B +Hello, Fluvio - 1 +Hello, Fluvio - 2 +Hello, Fluvio - 3 +Hello, Fluvio - 4 +... +``` + +Checkout the next section, for instructions on how to run the connector in the background. + +[generated]: ./generate.mdx \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/connectors/developers/generate.mdx b/versioned_docs/version-0.13.0/connectors/developers/generate.mdx new file mode 100644 index 00000000..1d21a618 --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/developers/generate.mdx @@ -0,0 +1,112 @@ +--- +sidebar_position: 2 +title: "Generate" +description: "Generate helps developers build a sample Connector project by answering a few simple questions." +--- + +### Generate a Connector + +CDK generate helps developers build a sample Connector project by answering a few simple questions. + +Use `cdk generate` to create a new connector project: + +```shell +$ cdk generate +🤷 Project Name: my-connector +🤷 Please set a group name: acme +🤷 Which type of Connector would you like [source/sink]? · source +🤷 Will your Connector be public? · false +[1/8] Done: .gitignore +[2/8] Done: Cargo.toml +[3/8] Done: Connector.toml +[4/8] Done: README.md +[5/8] Done: sample-config.yaml +[6/8] Done: src/config.rs +[7/8] Done: src/main.rs +[8/8] Done: src +``` + +The generator created Rust project ready to compile: + +``` +$ tree +. +├── Cargo.toml +├── Connector.toml +├── README.md +├── sample-config.yaml +└── src + ├── config.rs + └── main.rs +``` + +This a simple connector with the code in `src/main.rs`: + +```rust title="src/main.rs" +mod config; +use config::CustomConfig; + +use fluvio::{RecordKey, TopicProducerPool}; +use fluvio_connector_common::{ + connector, + Result +}; + +#[connector(source)] +async fn start(config: CustomConfig, producer: TopicProducerPool) -> Result<()> { + println!("Starting my-connector source connector with {config:?}"); + for i in 1..1000 { + let value = format!("Hello, Fluvio - {i}"); + producer.send(RecordKey::NULL, value).await?; + producer.flush().await?; + std::thread::sleep(std::time::Duration::from_millis(1000)); + } + Ok(()) +} +``` + +Connectors may also have define configuration parameters as shown in `src/config.rs`: + +```rust title="src/config.rs" +use fluvio_connector_common::connector; + +#[connector(config)] +#[derive(Debug)] +pub(crate) struct CustomConfig { + #[allow(dead_code)] + foo: String, +} +``` + +The `Connector.toml` file contains the definition of the Connector parameters required to load the file in the Cluster and publish it to Connector Hub. + +```toml title="Connector.toml" +[package] +name = "my-connector" +group = "acme" +version = "0.1.0" +apiVersion = "0.1.0" +fluvio = "0.10.0" +description = "" +license = "Apache-2.0" +visibility = "private" + +[direction] +source = true + +[deployment] +binary = "my-connector" + +[custom.properties.foo] +title = "Foo" +description = "Foo" +type = "string" +``` + +#### Sections + +* `package` is used to build the connector FQDN `acme/my-connector@0.1.0`, and the description to publish to Hub. The `group` name is equivalent to the package owner in the Hub. +* `direction` is used to declare the direction data flows through the connector, with respect to the Fluvio cluster. An inbound connector uses `source = true`, and and outbound connector uses `sink = true` +* `custom.properties.foo` defines a user configuration key `foo` that can be used in the logic of the connector. + +The project is ready to build and test. diff --git a/versioned_docs/version-0.13.0/connectors/developers/logging.mdx b/versioned_docs/version-0.13.0/connectors/developers/logging.mdx new file mode 100644 index 00000000..54e10dae --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/developers/logging.mdx @@ -0,0 +1,53 @@ +--- +sidebar_position: 5 +title: "Logging" +description: "Starting and stopping a Connector" +--- + +> This section assumes `my-connector` project has been [generated]. + +After you [start your connectors], you can view their logs for troubleshooting: + +### Log + +To display the logs, you'll need the connector name: + + +```bash copy="fl" +$ cdk deploy log --name my-my-connector-test-connector +Starting my-connector source connector with CustomConfig { foo: "bar" } +``` + +### Change the Log Level + +By default connectors will use the `info` logging level, you can change the log level by using the `deploy` command argument `--log-level`. + +```bash +cdk deploy start --config sample-config.yaml --log-level debug +``` + +The log levels are: +- `error` +- `warn` +- `info` +- `debug` +- `trace` + +Check the log again: + +```bash copy="fl" +$ cdk deploy log --name my-my-connector-test-connector +... +DEBUG run:send_and_receive{self=MultiplexerSocket 9}: fluvio_socket::multiplexing: sending request api=0 correlation_id=285 +``` + +Shutdown the connector: + +```bash copy="fl" +$ cdk deploy shutdown --name my-my-connector-test-connector +Shutting down connector: my-my-connector-test-connector + ``` + +In the next section, we'll take a look at how to use secrets. + +[generated]: ./generate.mdx diff --git a/versioned_docs/version-0.13.0/connectors/developers/overview.mdx b/versioned_docs/version-0.13.0/connectors/developers/overview.mdx new file mode 100644 index 00000000..575606d9 --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/developers/overview.mdx @@ -0,0 +1,77 @@ +--- +sidebar_position: 1 +title: "Overview" +description: "Fluvio Connectors make the process of importing or exporting data simple" +--- + +# Overview - Build Your Own Connector + +[Fluvio Connector Development Kit (CDK)] is a command-line tool whose primary goal is to help developers build, test, deploy Connectors, and publish them to the Hub. CDK only supports connectors developed in the Rust programming language. Other languages will be added in future releases. + +### Requirements + +Install Rust compiler and Cargo. + +```bash copy="fl" +$ curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh +``` + +For additional information visit [Install Rust] guide. + +Next add the musl toolchain: + +```bash copy="fl" +$ rustup target add x86_64-unknown-linux-musl +``` + +Some Linux distributions will also require adding a musl compiler package. + + +### Steps + +The CDK command line allows developers to build, test, and publish Connectors to the Hub: + +1. [Generate a Connector] +2. [Build and Test] +3. [Start and Shutdown] +4. [Logging] +5. [Secrets] +6. [Publish to Connector Hub] + + +### References + +You may use the following github repositories as inspiration + +#### Template Connectors + +Template for bootstrapping a source repository: + +* https://github.com/infinyon/connector-hello-source + +#### Official Connectors + +Source code repo for the InfinyOn official connectors: + +* https://github.com/infinyon/http-source-connector +* https://github.com/infinyon/http-sink-connector +* https://github.com/infinyon/kafka-connector +* https://github.com/infinyon/sql-connector +* https://github.com/infinyon/mqtt-connector +* https://github.com/infinyon/graphite-sink-connector + +#### Experimental Connectors + +These connectors are not guaranteed to work with latest fluvio: + +* https://github.com/infinyon/labs-redis-sink-connector +* https://github.com/infinyon/duckdb-connector + +[Fluvio Connector Development Kit (CDK)]: ../cdk.mdx +[Generate a Connector]: ./generate.mdx +[Build and Test]: ./build.mdx +[Start and Shutdown]: ./start-shutdown.mdx +[Logging]: ./logging.mdx +[Secrets]: ./secrets.mdx +[Publish to Connector Hub]: ./publish.mdx +[Install Rust]: https://www.rust-lang.org/tools/install diff --git a/versioned_docs/version-0.13.0/connectors/developers/publish.mdx b/versioned_docs/version-0.13.0/connectors/developers/publish.mdx new file mode 100644 index 00000000..99003a80 --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/developers/publish.mdx @@ -0,0 +1,40 @@ +--- +sidebar_position: 7 +title: "Publish to Hub" +description: "Publish your connector to the Hub and access it from anywhere." +--- + +> This section assumes `my-connector` project has been [generated]. + +Connector Hub is a public repository for connectors. You can publish your connector as `private` to use on different computers or `pubic` to share it with the community. + +### Publish Connector to Hub + +Use `cdk publish` to publish your connector to the Hub. If run without arguments, it will pack everything needed into a package and push the package to the Hub. + + +```bash copy="fl" +$ cdk publish +``` + +The connector is now available for download from the Hub. + + +### Show Hub Connectors + +Run `cdk hub list` to list connectors in the Hub. + + +```bash copy="fl" +$ cdk hub list + CONNECTOR Visibility + ... + acme/my-connector0.1.0 private +``` + +You will see all `public` connectors and your own `private` connectors. + +:tada: Congratulations, you've now completed the tutorial. It's time to build your own connectors. + +[generated]: ./generate.mdx + diff --git a/versioned_docs/version-0.13.0/connectors/developers/secrets.mdx b/versioned_docs/version-0.13.0/connectors/developers/secrets.mdx new file mode 100644 index 00000000..fc3464d5 --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/developers/secrets.mdx @@ -0,0 +1,64 @@ +--- +sidebar_position: 6 +title: "Secrets" +description: "Manage sensitive information in your Connector using Secrets." +--- + +> This section assumes `my-connector` project has been [generated]. + +Connectors often connect to external entities such as `databases`, `message brokers`, or `APIs` that require a confidential authentication key. + +Connectors offers this facility through `secrets`. + +### Use Secrets + +Let's define a file of secrets (one secret per line) in the following format: + +```bash +SECRET_NAME=SECRET_VALUE +SECRET_NAME_2=SUPER_SECRET_VALUE +``` + +Deploy connectors with a `--secrets` flag to pass a file with the secrets definitions: + + +```bash copy="fl" +$ cdk deploy start --config sample-config.yaml --secrets secrets.txt +``` + +Code to indicate that a connector config parameter can contain a secret should use the `SecretString` type. This allows the parameter to receive secrets which are not printable to logs. + +```rust +use fluvio_connector_common::{connector, secret::SecretString}; + +#[derive(Debug)] +#[connector(config, name = "myconnector")] +pub(crate) struct MyConnectorConfig { + /// A parameter receiving a secret string + pub a_param: SecretString, + // -- snip -- +} +``` + +This allows a config file to provision secrets to the connector. +```yaml +# config-example.yaml +apiVersion: 0.1.0 +meta: + version: 0.1.0 + name: instancename + type: my-connector + topic: atopicname + secrets: + - name: SECRET_NAME +myconnector: + a_param: "${{ secrets.SECRET_NAME }}_${{ secrets.SECRET_NAME_2 }}" + +``` + +More extensive examples of secrets in connectors can be seen in use with the [Http Source] connector and the github repo https://github.com/infinyon/http-source-connector. + +In the next section, we'll publish our connector to the Hub. + +[generated]: ./generate +[Http Source]: hub/connectors/inbound/http.mdx diff --git a/versioned_docs/version-0.13.0/connectors/developers/start-shutdown.mdx b/versioned_docs/version-0.13.0/connectors/developers/start-shutdown.mdx new file mode 100644 index 00000000..8bf4229d --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/developers/start-shutdown.mdx @@ -0,0 +1,56 @@ +--- +sidebar_position: 4 +title: "Start & Shutdown" +description: "Starting and stopping a Connector" +--- + +> This section assumes `my-connector` project has been [generated]. + +### Connector start + +[Testing] your connector runs the process in the foreground. Use `cdk deploy start` to run your connector in the background: + + +```bash copy="fl" +$ cdk deploy start --config sample-config.yaml +Log file: /private/tmp/my-connector/my-connector.log +Connector runs with process id: 88589 +``` + +### List running connectors + +CDK offers a convenience function to list running connectors: + + +```bash copy="fl" +$ cdk deploy list + NAME STATUS + my-my-connector-test-connector Running +``` + +### Test Result + +Consume to check the result: + +```bash copy="fl" +$ fluvio consume test-my-connector-topic -B +Hello, Fluvio - 1 +Hello, Fluvio - 2 +Hello, Fluvio - 3 +Hello, Fluvio - 4 +... +``` + +### Shutdown Connector + +Stop a running your connector with `cdk deploy shutdown` + +```bash copy="fl" +$ cdk deploy shutdown --name my-my-connector-test-connector +Shutting down connector: my-my-connector-test-connector +pid: 56421 + ``` + +In the next section, we'll take a look at the logs for troubleshooting. + +[generated]: ./generate.mdx \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/connectors/how-to/_category_.json b/versioned_docs/version-0.13.0/connectors/how-to/_category_.json new file mode 100644 index 00000000..359d0c78 --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/how-to/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "How To", + "collapsed": false, + "position": 20 +} diff --git a/versioned_docs/version-0.13.0/connectors/how-to/run-cloud.mdx b/versioned_docs/version-0.13.0/connectors/how-to/run-cloud.mdx new file mode 100644 index 00000000..4a2ae7b1 --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/how-to/run-cloud.mdx @@ -0,0 +1,97 @@ +--- +sidebar_position: 20 +title: Run Cloud +description: "Run a Connector on your local machine" +--- + +# Run Connectors on Cloud + +Check [the Hub] for a library of connectors that you can download and run. This document will guide you through downloading and running a certified connector in [InfinyOn Cloud]. + +## Prerequisites + +This document assumes that you have provisioned a Cluster in your InfinyOn Cloud account. If you have not done so, please use the [following instructions][InfinyOn Cloud]. + + +## 1. Create a Connector Configuration File + +Let's create new directory for the connector configuration file: + +```bash copy="fl" +$ mkdir http-source-cloud; cd http-source-cloud +``` + +Create `quotes-config.yml` and paste the following configuration: + +```yaml +# quotes-config.yaml +apiVersion: 0.1.0 +meta: + version: 0.3.8 + name: http-quotes + type: http-source + topic: quotes +http: + endpoint: https://demo-data.infinyon.com/api/quote + interval: 2s +``` + +The configuration file ask the `http-source` connector to read a new quote `quote` from the `demo-data` server every 2 seconds. + +## 2. Run the `http-source` Connector + +Use the configuration file and create a new connector: + +```bash copy="fl" +$ fluvio cloud connector create -c quotes-config.yaml +``` + +Switch the log level `--log-level debug`. The log level can be set to `trace`, `debug`, `info`, `warn`, `error`. + +To check if the connector is running: + +```bash copy="fl" +$ fluvio cloud connector list +``` + +The connector provisioned the `quotes` topic and it periodically sends a new quote. + + +## 3. Check the Result + +Read from the fluvio topic: + +```bash copy="fl" +$ fluvio consume quotes +``` + +Show the logs: + +```bash copy="fl" +$ fluvio cloud connector log http-quotes +``` + + +## 4. Clean-up + +The connector will continue to run and consume your credits. Let's shut it down: + +```bash copy="fl" +$ fluvio cloud connector delete http-quotes +``` + +We can also delete the topic + +```bash copy="fl" +$ fluvio topic delete quotes +``` + +## References + +* [SmartModules] to apply transformations +* [Tutorials] to see additional examples + +[the Hub]: hub/connectors/index.md +[InfinyOn Cloud]: cloud/quickstart.mdx +[SmartModules]: smartmodules/overview.mdx +[Tutorials]: cloud/tutorials/index.md diff --git a/versioned_docs/version-0.13.0/connectors/how-to/run-local.mdx b/versioned_docs/version-0.13.0/connectors/how-to/run-local.mdx new file mode 100644 index 00000000..42e86953 --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/how-to/run-local.mdx @@ -0,0 +1,106 @@ +--- +sidebar_position: 10 +title: Run Local +description: "Run a Connector on your local machine" +--- +import Versions from '../../_embeds/versions'; + +# Run Connectors on Your Local Machine + +Check [the Hub] for a library of connectors that you can download and run. If the connector you need isn't available, you can [build your own]. This document will guide you through downloading and running a certified connector from the Hub on your local machine. + + +## 1. Download `http-source` Connector from the Hub + +Let's create new directory and download {Versions.infinyon_http_source} connector from the Hub: + +```bash copy="fl" +$ mkdir http-source; cd http-source +``` + +Use [`cdk`] to download the `http-source` connector: + +```bash copy="fl" +$ cdk hub download infinyon/http-source@0.3.8 +``` + +The file has been downloaded to your local machine. + + +## 2. Create a Connector Configuration File + +Save the following configuration file on your machine: + +```yaml +# quotes-config.yaml +apiVersion: 0.1.0 +meta: + version: 0.3.8 + name: http-quotes + type: http-source + topic: quotes +http: + endpoint: https://demo-data.infinyon.com/api/quote + interval: 2s +``` + +The configuration file ask the `http-source` connector to read a new quote `quote` from the `demo-data` server every 2 seconds. + +## 3. Run the `http-source` Connector + +Use connector `.ipkg` package file and the configuration file to run the connector: + +```bash copy="fl" +$ cdk deploy start --ipkg infinyon-http-source-0.3.8.ipkg --config ./quotes-config.yaml +``` + +Switch the log level `--log-level debug`. The log level can be set to `trace`, `debug`, `info`, `warn`, `error`. + +To check if the connector is running: + +```bash copy="fl" +$ cdk deploy list +``` + +The connector provisioned the `quotes` topic and it periodically sends a new quote. + +## 4. Check the Result + +Read from the fluvio topic: + +```bash copy="fl" +$ fluvio consume quotes +``` + +Show the logs: + +```bash copy="fl" +$ cdk deploy log --name http-quotes +``` + + +## 5. Clean-up + +The connector is running in the background. Let's shut it down: + +```bash copy="fl" +$ cdk deploy shutdown --name http-quotes +``` + +We can also delete the topic + +```bash copy="fl" +$ fluvio topic delete quotes +``` + +## References + +* [SmartModules] to apply transformations +* [Tutorials] to see additional examples + + +[the Hub]: hub/connectors/index.md +[build your own]: ../developers/overview.mdx +[`cdk`]: ../cdk.mdx +[SmartModules]: smartmodules/overview.mdx +[Tutorials]: fluvio/tutorials/index.md \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/connectors/overview.mdx b/versioned_docs/version-0.13.0/connectors/overview.mdx new file mode 100644 index 00000000..0d32db47 --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/overview.mdx @@ -0,0 +1,39 @@ +--- +sidebar_position: 1 +title: "Overview" +description: "Fluvio Connectors make the process of importing or exporting data simple" +--- + +# Connectors Overview + +Fluvio Connectors make the process of importing or exporting data simple. +You can import data with an `Inbound` connector and export data with an `Outbound` connector. + +Inbound and outbound connectors fundamentally work in the same way. The only difference is the direction your data is streaming with respect to a Fluvio topic. + +Connectorsconnectors/[SmartModules] page to learn how to apply a transformation or create a new one. + + +### Next Steps + +* [Run connector on local machine] +* [Run connector in the cloud] +* [Connector Hub] +* [Connector Configuration File] +* [Build a new connector] +* [Apply SmartModule transformations] + + + +[SmartModules]: smartmodules/overview.mdx +[Run connector on local machine]: ./how-to/run-local.mdx +[Run connector in the cloud]: ./how-to/run-cloud.mdx +[Connector Hub]: hub/connectors/index.md +[Connector Configuration File]: ./configuration.mdx +[Build a new connector]: ./developers/overview.mdx +[Apply SmartModule transformations]: smartmodules/overview.mdx + diff --git a/versioned_docs/version-0.13.0/connectors/troubleshooting.mdx b/versioned_docs/version-0.13.0/connectors/troubleshooting.mdx new file mode 100644 index 00000000..7d49d534 --- /dev/null +++ b/versioned_docs/version-0.13.0/connectors/troubleshooting.mdx @@ -0,0 +1,69 @@ +--- +sidebar_position: 200 +title: "Troubleshooting" +description: "Connector Troubleshooting" +--- + +# Connector Build Troubleshooting + +## Multiplatform builds + +Connectors on the Hub can be published associated with multiple targets so +they can be run multiple platforms. This can be accomplished by building cdk on +the same target it's intended for, or cross compiling one target from another +platform. Compiling for one target while on another target can be complex, so +this troubleshooting section provides added target toolchain support information +for some common platform/target combinations. + +## MacOS + +Build command for local builds, if `cdk build` does not work, explicitly specify +the target: + +```bash +cdk build --target aarch64-apple-darwin +``` + +## Ubuntu or Debian based Linux Distributions {#ubuntu-debian} + +Build prerequisites for `x86-unknown-linux-musl` on Ubuntu from an +`x86-unknown-linux-gnu` environment. + +System packages: +```bash +sudo apt install build-essential musl-tools +``` + +Build command to build and test locally, instead of 'cdk build', use the +following: + +```bash +CARGO_TARGET_X86_64_UNKNOWN_LINUX_MUSL_LINKER=x86_64-linux-musl-gcc cdk build +``` + +## Other rust cargo cross-platform build toolchains + +Connector projects are rust projects, and different choices exist for cross +compilation. A project connector binary build by a rust toolchain can be +published by `cdk` with a `--no-build` flag. Different cross compilation +projects for rust include: + +- Cargo cross https://github.com/cross-rs/cross +- Cargo zigbuild https://github.com/rust-cross/cargo-zigbuild + +For a working example, see the [connector-publish github workflow]. + +## Windows, WSL, WSL2 + +Fluvio and `cdk` are supported for windows only though WSL2. WSL2 often installs +Ubuntu as the default Linux distribution. See the [Ubuntu] section for more +build troubleshooting. + +## Infinyon Cloud Certified Connectors + +Infinyon Certified connectors are built for the `aarch64-unknown-linux-musl` target. + +To build and publish for the cloud, InfinyOn often uses a [connector-publish github workflow]. + +[Ubuntu]: #ubuntu-debian +[connector-publish github workflow]: https://github.com/infinyon/fluvio/blob/master/.github/workflows/connector-publish.yml diff --git a/versioned_docs/version-0.13.0/fluvio/apis/_category_.json b/versioned_docs/version-0.13.0/fluvio/apis/_category_.json new file mode 100644 index 00000000..5d29816e --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/apis/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "Client APIs", + "collapsed": true, + "position": 80 +} diff --git a/versioned_docs/version-0.13.0/fluvio/apis/nodejs/_category_.json b/versioned_docs/version-0.13.0/fluvio/apis/nodejs/_category_.json new file mode 100644 index 00000000..cdd84558 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/apis/nodejs/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "NodeJS SDK", + "collapsed": true, + "position": 40 +} diff --git a/versioned_docs/version-0.13.0/fluvio/apis/nodejs/example.mdx b/versioned_docs/version-0.13.0/fluvio/apis/nodejs/example.mdx new file mode 100644 index 00000000..e3c9791e --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/apis/nodejs/example.mdx @@ -0,0 +1,124 @@ +--- +title: Examples +sidebar_position: 20 +--- + +# NodeJS SDK Examples + +* This client uses [`node-bindgen`] to wrap the Rust client. +* It supports most administrator features. +* The blocking calls to Fluvio return promises allowing for async on blocking Fluvio calls. +* The [`PartitionConsumer.createStream`] call returns an [`asyncIterator`] to allow iterating over the stream in a for-loop. + +To see the full docs, visit [our typedoc page]. + + + + +## Example Workflow + +Follow the [installation instructions] to run this example. + +```js +/** +* This is an example of a basic Fluvio workflow in Typescript +* +* 1. Establish a connection to the Fluvio cluster +* 2. Create a topic to store data in +* 3. Create a producer and send some bytes +* 4. Create a consumer, and stream the data back +*/ +import Fluvio, { Offset, Record } from "@fluvio/client"; + +const TOPIC_NAME = "hello-node"; +const PARTITION = 0; + +async function createTopic() { + try { + // Connect to the Fluvio cluster + console.log("Connecting client to fluvio"); + await fluvio.connect(); + + // Create admin client; + const admin = await fluvio.admin(); + + // Create topic + console.log("Creating topic"); + await admin.createTopic(TOPIC_NAME); + } catch (ex) { + console.log("Topic already exists", ex); + } +} + +const produce = async () => { + // Connect to the Fluvio cluster + console.log("Connecting client to fluvio"); + await fluvio.connect(); + + // Create a topic producer; + const producer = await fluvio.topicProducer(TOPIC_NAME); + await producer.send("example-key", "Hello World! - Time is " + Date()); +}; + +const consume = async () => { + try { + // Connect to the fluvio cluster referenced in the cli profile. + await fluvio.connect(); + + // Create partition consumer + const consumer = await fluvio.partitionConsumer(TOPIC_NAME, PARTITION); + + console.log("read from the end"); + await consumer.stream(Offset.FromEnd(), async (record: Record) => { + // handle record; + console.log(`Key=${record.keyString()}, Value=${record.valueString()}`); + process.exit(0); + }); + } catch (ex) { + console.log("error", ex); + } +}; + +// Create Fluvio Client Instance +const fluvio = new Fluvio(); +createTopic(); +produce(); +consume(); +``` + +### Run + +```shell +$ npx ts-node example.ts +``` + +### Expected Output +``` +Connecting client to fluvio +Connecting client to fluvio +Creating topic +read from the end +Key=example-key, Value=Hello World! - Time is (...) +``` +The above code tries to create a topic, produces an entry for the topic, and consumes the said entry. The ```process.exit(0)``` consumes only one record before ending. + +## Links to Docs: +- [Connect to Fluvio] +- [Create a Producer] +- [Send to Topic] +- [Get a Consumer] +- [Create a Stream] + + +[`node-bindgen`]: https://github.com/infinyon/node-bindgen +[our typedoc page]: https://infinyon.github.io/fluvio-client-node/ +[`PartitionConsumer.createStream`]: https://infinyon.github.io/fluvio-client-node/classes/PartitionConsumer.html#createStream +[`asyncIterator`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for-await...of + +[installation instructions]: fluvio/quickstart.mdx + +[Connect to Fluvio]: https://infinyon.github.io/fluvio-client-node/interfaces/FluvioClient.html#connect +[Create a Producer]: https://infinyon.github.io/fluvio-client-node/interfaces/FluvioClient.html#topicProducer +[Send to Topic]: https://infinyon.github.io/fluvio-client-node/classes/TopicProducer.html#send +[Get a Consumer]: https://infinyon.github.io/fluvio-client-node/interfaces/FluvioClient.html#partitionConsumer +[Create a Stream]: https://infinyon.github.io/fluvio-client-node/classes/PartitionConsumer.html#createStream diff --git a/versioned_docs/version-0.13.0/fluvio/apis/nodejs/installation.mdx b/versioned_docs/version-0.13.0/fluvio/apis/nodejs/installation.mdx new file mode 100644 index 00000000..dff6e6b0 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/apis/nodejs/installation.mdx @@ -0,0 +1,46 @@ +--- +title: Installation +sidebar_position: 10 +--- + +## NodeJS SDK Installation Guide + +Install [Node.js](https://nodejs.org/en/) (v**16.11.0** or above) + +We are using [`npm`](https://nodejs.dev/en/learn/an-introduction-to-the-npm-package-manager/) as package manager. + +## Create New Node project for Fluvio development + +Run the following commands to set up your project for development: + +```bash +mkdir fluvio-demo && cd fluvio-demo +npm init -y +npm install -D typescript ts-node @types/node +npm install -S @fluvio/client +``` + +And your `package.json` should look similar to the following: + +```json +{ + "name": "fluvio-demo", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "devDependencies": { + "@types/node": "^22.3.0", + "ts-node": "^10.9.2", + "typescript": "^5.5.4" + }, + "dependencies": { + "@fluvio/client": "^0.14.9" + } +} +``` diff --git a/versioned_docs/version-0.13.0/fluvio/apis/overview.mdx b/versioned_docs/version-0.13.0/fluvio/apis/overview.mdx new file mode 100644 index 00000000..02476739 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/apis/overview.mdx @@ -0,0 +1,121 @@ +--- +sidebar_position: 10 +title: "Overview" +description: "Walkthrough of Fluvio Software Development Kits" +--- + +This page describes how to generally use any of the Fluvio clients. Each +client has some differences but these are the general rules about them. + +For more details on each available client, see the following: + +### Official API Clients + + + + + + + +### Experimental API Clients + + + + + + + +## Connect to Fluvio + +The first thing you want to do to use a Fluvio client is connect to the Fluvio +cluster. + +## Producer + +Once you've got a connection handler, you will want to create a producer for a +given topic. + +The producer could be created with the following configurations: `max_request_size`, `batch_size`, `compression`, `linger` and `partitioner`. + +These configurations control the behavior of the producer in the following way: + +* `max_request_size`: Maximum number of bytes that the producer can send in a single request. If the record is larger than the max request size, the producer drops the record and returns an error. Defaults to 1048576 bytes. +* `batch_size`: Maximum number of bytes accumulated by the records before sending the batch. If the record is larger than the batch size, the producer will split the records and send them in multiple batches. Defaults to 16384 bytes. +* `compression`: Compression algorithm used by the producer to compress each batch before sending to the SPU. Supported compression algorithms are `none`, `gzip`, `snappy` and `lz4`. +* `linger`: The maximum time to wait to accumulate records before sending the batch. Defaults to 100 ms. +* `partitioner`: custom class/struct that assigns the partition to each record that needs to be send. Defaults to Siphash Round Robin partitioner. + +### Sending + +When sending into a stream, the general `send` will take a `key` and a `value`. +The `key` is optional. For clients which don't have `Option` as a feature, this +is simply an empty array. + +Depending on the client, these can be `string` or an array of `bytes`. + +Depending on the producer configuration, a `send` call will not send immediately the record to the SPU. `flush` is used to immediately send all the queued records in the producer batches. Producers should `flush` before terminating to ensure that all records are sent properly. + +## Consumer + +Similar to a [producing](#producer), once you've got a connection, you'll need +to create a consumer for a given topic. + +### Streams + +Once you've got a consumer, you can create a stream given an [offset](#offsets) +and listen for new items. + +Most of our clients support idiomatic ways of iterating over the items in the stream: +* The rust client stream uses a [`Stream`] +* The node client stream implements the [`asyncIterator`] +* The python client stream implements `__next__` making it a [Python Iterator] + +[`Stream`]: https://docs.rs/futures/0.3.15/futures/stream/trait.Stream.html +[`asyncIterator`]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for-await...of +[Python Iterator]: https://wiki.python.org/moin/Iterator + +This functionality has not been implemented for the java client yet. + +### Offsets + +An offset is used when a stream is created to request the stream to start at +`N` items from the beginning/end of the stream. + +### Records + +Each item in a [stream](#streams) is a `Record`. + +Our clients differ a little bit on this but a `Record` is a wrapper around +array of bytes with accessor methods of `key` or `value`. + +In the python, node and java clients, we have to-string convenience methods. + +### Timestamps + +Fluvio `Records` contain timestamp information. As of Fluvio `0.9.25`, the timestamp of each record is set by Fluvio Producer on creation, previously the timestamp fields were uninitialized. This information is available to the consumer using the respective API call `timestamp()`. diff --git a/versioned_docs/version-0.13.0/fluvio/apis/python/_category_.json b/versioned_docs/version-0.13.0/fluvio/apis/python/_category_.json new file mode 100644 index 00000000..2f4fceba --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/apis/python/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "Python SDK", + "collapsed": true, + "position": 30 +} diff --git a/versioned_docs/version-0.13.0/fluvio/apis/python/example.mdx b/versioned_docs/version-0.13.0/fluvio/apis/python/example.mdx new file mode 100644 index 00000000..69bea11e --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/apis/python/example.mdx @@ -0,0 +1,145 @@ +--- +title: Examples +sidebar_position: 20 +--- + +# Python SDK Examples + +* The Python client [wraps the rust client](https://www.infinyon.com/blog/2021/03/python-client/). +* It currently does not support the administrator features that the rust client does. +* The [PartitionConsumer.stream](https://infinyon.github.io/fluvio-client-python/fluvio.html#PartitionConsumer.stream) returns an object which implements the [python iterator convention](https://wiki.python.org/moin/Iterator) to allow for iterating over the stream in a for-loop. + +To see the full docs, visit our [pdoc page](https://infinyon.github.io/fluvio-client-python/fluvio.html). + +## Examples + +### Producer + +Create the topic used to produce and consume records: + +```bash copy="cmd" +$ fluvio topic create python-data +``` + +Create a file called `python-produce.py`: + +```python +#!/usr/bin/env python +from datetime import datetime +from fluvio import Fluvio + +TOPIC_NAME = "python-data" +PARTITION = 0 + +if __name__ == "__main__": + # Connect to cluster + fluvio = Fluvio.connect() + + # Produce 10 records to topic + producer = fluvio.topic_producer(TOPIC_NAME) + for x in range(10): + producer.send_string("{}: timestamp: {}".format(x, datetime.now())) + + # Flush the last entry + producer.flush() +``` + +Let's run the file: + +```bash copy="cmd" +$ python python-produce.py +``` + +### Consumer + +Create a file called `python-consume.py`: + +```python +#!/usr/bin/env python +from fluvio import Fluvio, Offset + +TOPIC_NAME = "python-data" +PARTITION = 0 + +if __name__ == "__main__": + # Connect to cluster + fluvio = Fluvio.connect() + + # Consume last 10 records from topic + consumer = fluvio.partition_consumer(TOPIC_NAME, PARTITION) + for idx, record in enumerate( consumer.stream(Offset.from_end(10)) ): + print("{}".format(record.value_string())) + + if idx >= 9: + break +``` + +Let's run the file: + +```bash copy="cmd" +$ python python-consume.py +``` + +## Example with a SmartModule + +This is a 3 part example: + +* [Build a SmartModule that converts records to uppercase] +* [Add a Python script that uses the SmartModule](#python-script-that-uses-a-smartmodule) +* [Test the Python script](#test-python-script) + +### Python Script that uses a SmartModule + +Create a file called `smartmodule-consumer.py`: + +```python +#!/usr/bin/env python +import os +from datetime import datetime +from fluvio import Fluvio, Offset, ConsumerConfig + +TOPIC_NAME = "hello-python-smartmodule" +PARTITION = 0 + +# This is an example of a basic Fluvio workflow in Python +# +# 1. Create a topic to store data in via CLI +# 2. Establish a connection to the Fluvio cluster +# 3. Create a producer and send some bytes +# 4. Create a consumer, and stream the data back +if __name__ == "__main__": + # Currently the Python client does not support creating topics + # Using the Fluvio CLI + os.popen("fluvio topic create {}".format(TOPIC_NAME)) + + # Connect to cluster + fluvio = Fluvio.connect() + + # Produce to topic + producer = fluvio.topic_producer(TOPIC_NAME) + producer.send_string("Hello World! - Time is: {}".format(datetime.now())) + + # Consume from topic + # We're just going to get the last record + consumer = fluvio.partition_consumer(TOPIC_NAME, PARTITION) + + # Create a ConsumerConfig using your "uppercase" smartmodule + config = ConsumerConfig() + config.smartmodule(name="uppercase") + + for record in consumer.stream_with_config(Offset.from_end(0), config): + print("{}".format(record.value_string())) + break +``` + +### Test Python Script + +Read to test the script: + +```shell copy="cmd" +$ python smartmodule-consumer.py +HELLO WORLD! - TIME IS: 2024-08-25 21:04:51.172045 +``` + + +[Build a SmartModule that converts records to uppercase]: /docs/smartmodules/tutorials/make-uppercase.mdx \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/fluvio/apis/python/installation.mdx b/versioned_docs/version-0.13.0/fluvio/apis/python/installation.mdx new file mode 100644 index 00000000..5b647db5 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/apis/python/installation.mdx @@ -0,0 +1,23 @@ +--- +title: Installation +sidebar_position: 10 +--- + +## Python SDK Installation Guide + +## Minimum Python + +The minimum Python supported is 3.6. +* https://wiki.python.org/moin/BeginnersGuide/Download + +## Install Python's package manager `pip` +You will need `pip` to install the `fluvio` package from PyPi +* https://pip.pypa.io/en/stable/installation/ + +## Install [`fluvio`](https://pypi.org/project/fluvio/) with `pip` + +Run the following to install [`fluvio`](https://pypi.org/project/fluvio/) package + +```shell copy="fl" +$ pip install fluvio +``` diff --git a/versioned_docs/version-0.13.0/fluvio/apis/rust/_category_.json b/versioned_docs/version-0.13.0/fluvio/apis/rust/_category_.json new file mode 100644 index 00000000..7b3123a9 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/apis/rust/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "Rust SDK", + "collapsed": true, + "position": 20 +} diff --git a/versioned_docs/version-0.13.0/fluvio/apis/rust/example.mdx b/versioned_docs/version-0.13.0/fluvio/apis/rust/example.mdx new file mode 100644 index 00000000..0d1493af --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/apis/rust/example.mdx @@ -0,0 +1,167 @@ +--- +title: Examples +sidebar_position: 20 +--- + +# Rust SDK Examples + +* The Rust client is the core client for all language clients. + * New features arrive in the Rust client before any of the other clients +* Full support for the [Admin API](https://docs.rs/fluvio/latest/fluvio/struct.FluvioAdmin.html). +* This client uses [async Rust](https://rust-lang.github.io/async-book/) for all blocking calls. + +Refer to the [fluvio docs.rs page] for full detail. +## Example Workflow + +Follow the [installation instructions](/) to run this example. + +```toml +[package] +edition = "2021" +name = "fluvio-rust-example" +publish = false +version = "0.0.0" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +async-std = {version = "1", features = ["attributes"]} +chrono = "0.4" +fluvio = "0.23" +``` + +```rust +use async_std::stream::StreamExt; +use chrono::Local; +use fluvio::metadata::topic::TopicSpec; +use fluvio::{consumer::ConsumerConfigExtBuilder, Fluvio, RecordKey}; + +const TOPIC_NAME: &str = "hello-rust"; +const PARTITION_NUM: u32 = 0; +const PARTITIONS: u32 = 1; +const REPLICAS: u32 = 1; + +/// This is an example of a basic Fluvio workflow in Rust +/// +/// 1. Establish a connection to the Fluvio cluster +/// 2. Create a topic to store data in +/// 3. Create a producer and send some bytes +/// 4. Create a consumer, and stream the data back +#[async_std::main] +async fn main() { + // Connect to Fluvio cluster + let fluvio = Fluvio::connect().await.unwrap(); + + // Create a topic + let admin = fluvio.admin().await; + let topic_spec = TopicSpec::new_computed(PARTITIONS, REPLICAS, None); + let _topic_create = admin + .create(TOPIC_NAME.to_string(), false, topic_spec) + .await; + + // Create a record + let record = format!("Hello World! - Time is {}", Local::now().to_rfc2822()); + + // Produce to a topic + let producer = fluvio::producer(TOPIC_NAME).await.unwrap(); + producer.send(RecordKey::NULL, record).await.unwrap(); + // Fluvio batches outgoing records by default, so flush producer to ensure all records are sent + producer.flush().await.unwrap(); + + // Consume last record from topic + let fluvio = Fluvio::connect().await.unwrap(); + let config = ConsumerConfigExtBuilder::default() + .topic(TOPIC_NAME) + .partition(PARTITION_NUM) + .offset_start(fluvio::Offset::from_end(1)) + .build() + .unwrap(); + + let mut stream = fluvio.consumer_with_config(config).await.unwrap(); + if let Some(Ok(record)) = stream.next().await { + let string = String::from_utf8_lossy(record.value()); + println!("{}", string); + } +} +``` + +### Run + +```shell copy="fl" +$ cargo run +``` + +## Additional Producer options + +Alternatively, we can [create a producer with custom configuration]: + +Example: + +This is how to configure a Producer with a +`batch_size` of `500 bytes`, linger of `500ms` , and `Gzip` type compression. + +```rust +let config = TopicProducerConfigBuilder::default() + .batch_size(500) + .linger(std::time::Duration::from_millis(500)) + .compression(Compression::Gzip) + .build().expect("Failed to create topic producer config"); +let producer = fluvio.topic_producer_with_config("my-fluvio-topic", config).await.expect("Failed to create a producer"); +``` + +[create a producer with custom configuration]: https://docs.rs/fluvio/latest/fluvio/struct.Fluvio.html#method.topic_producer_with_config + +## Using a SmartModule with the Rust Consumer + +Below is an example of how to use a SmartModule filter with the Rust consumer. + +```rust +use std::io::Read; +use flate2::bufread::GzEncoder; +use flate2::Compression; +use fluvio::{Fluvio, Offset, PartitionConsumer}; +use fluvio::consumer::{ + SmartModuleInvocation, + SmartModuleInvocationWasm, + SmartModuleKind, + ConsumerConfig +}; +use async_std::stream::StreamExt; + +let raw_buffer = std::fs::read("/my_projects/example_filter/target/wasm32-unknown-unknown/release/example_filter.wasm").expect("wasm file is missing"); +let mut encoder = GzEncoder::new(raw_buffer.as_slice(), Compression::default()); +let mut buffer = Vec::with_capacity(raw_buffer.len()); +encoder.read_to_end(&mut buffer); + +let mut builder = ConsumerConfig::builder(); +builder.smartmodule(Some(SmartModuleInvocation { + wasm: SmartModuleInvocationWasm::AdHoc(buffer), + kind: SmartModuleKind::Filter, + params: Default::default() +})); +let filter_config = builder.build().expect("Failed to create config"); + +// create partition consumer +let consumer = fluvio.partition_consumer("my-topic", 0).await.expect("failed to create consumer"); +// stream from beginning +let mut stream = consumer.stream_with_config(Offset::beginning(),filter_config).await.expect("Failed to create stream"); + +while let Some(Ok(record)) = stream.next().await { + let key = record.key().map(|key| String::from_utf8_lossy(key).to_string()); + let value = String::from_utf8_lossy(record.value()).to_string(); + println!("Got filter event: key={:?}, value={}", key, value); +} +``` + +Refer to the [fluvio docs.rs page] for full detail. + +[Admin Api]: https://docs.rs/fluvio/latest/fluvio/struct.FluvioAdmin.html +[async rust]: https://rust-lang.github.io/async-book/ +[fluvio docs.rs page]: https://docs.rs/fluvio/ + +## Links to Docs: +* [Connect to Fluvio](https://docs.rs/fluvio/0.12.12/fluvio/struct.Fluvio.html#method.connect) +* [Get a Producer](https://docs.rs/fluvio/latest/fluvio/struct.Fluvio.html#method.topic_producer) +* [Send to Topic](https://docs.rs/fluvio/latest/fluvio/struct.TopicProducer.html#method.send) +* [Get a Consumer](https://docs.rs/fluvio/latest/fluvio/struct.Fluvio.html#method.partition_consumer) +* [Get a Stream](https://docs.rs/fluvio/latest/fluvio/consumer/struct.PartitionConsumer.html#method.stream) diff --git a/versioned_docs/version-0.13.0/fluvio/apis/rust/installation.mdx b/versioned_docs/version-0.13.0/fluvio/apis/rust/installation.mdx new file mode 100644 index 00000000..f1ec9f5a --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/apis/rust/installation.mdx @@ -0,0 +1,18 @@ +--- +title: Installation +sidebar_position: 10 +--- + +## Rust SDK Installation Guide + +The only requirement is to [install a basic Rust development environment](https://www.rust-lang.org/tools/install) + +## Add [`fluvio`] crate + +Then add [`fluvio`] under `[dependencies]` to your project's `Cargo.toml` + +```toml +fluvio = "0.23" +``` + +[`fluvio`]: https://crates.io/crates/fluvio diff --git a/versioned_docs/version-0.13.0/fluvio/cli/_category_.json b/versioned_docs/version-0.13.0/fluvio/cli/_category_.json new file mode 100644 index 00000000..30804219 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "CLI", + "collapsed": true, + "position": 60 +} diff --git a/versioned_docs/version-0.13.0/fluvio/cli/cdk.mdx b/versioned_docs/version-0.13.0/fluvio/cli/cdk.mdx new file mode 100644 index 00000000..f9f37b89 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/cdk.mdx @@ -0,0 +1,9 @@ +--- +sidebar_position: 20 +title: "cdk (link)" +description: "cdk is a tool for developing Fluvio connectors" +--- + +CDK is defined in [Connectors]. + +[Connectors]: connectors/cdk.mdx \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/fluvio/cli/fluvio/_category_.json b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/_category_.json new file mode 100644 index 00000000..888cff34 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/_category_.json @@ -0,0 +1,5 @@ +{ + "position": 10, + "label": "fluvio", + "collapsed": true +} diff --git a/versioned_docs/version-0.13.0/fluvio/cli/fluvio/cloud.mdx b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/cloud.mdx new file mode 100644 index 00000000..a55ab724 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/cloud.mdx @@ -0,0 +1,9 @@ +--- +sidebar_position: 90 +title: "cloud" +description: "InfinyOn Cloud functions using the Fluvio CLI" +--- + +`fluvio cloud` CLI commands operate with an InfinyOn cloud account. See their documentation [here]. + +[here]: cloud/cli/overview.mdx \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/fluvio/cli/fluvio/cluster.mdx b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/cluster.mdx new file mode 100644 index 00000000..b5166a36 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/cluster.mdx @@ -0,0 +1,132 @@ +--- +sidebar_position: 20 +title: "cluster" +description: "Managing your Fluvio cluster using the Fluvio CLI" +--- + +# The `fluvio cluster` command + +The `fluvio cluster` family of commands is used to install and manage your own +Fluvio cluster. The two primary forms of self-hosted cluster are Kubernetes +(e.g. via Minikube) and Local clusters, both of which can be automatically set up +by cluster commands. Please make sure you have followed the +[getting started guide] and installed all the necessary dependencies before +attempting to install your own cluster. + + +## `fluvio cluster check` + +This command is used to check whether you have all the required dependencies +installed correctly. If this runs and returns successfully, you should be all +set to start a Fluvio cluster. + +**Example usage** + + +```bash copy="fl" +$ fluvio cluster check +Running pre-startup checks... + ✅ Kubernetes config is loadable + ✅ Supported kubernetes version is installed + ✅ Supported helm version is installed + ✅ Can create service + ✅ Can create customresourcedefinitions + ✅ Can create secret + ✅ Fluvio system charts are installed +🎉 All checks passed! +You may proceed with cluster startup +next: run `fluvio cluster start` +``` + +## `fluvio cluster start` + +This command is used to start your own Fluvio cluster, with all the +machinery needed to receive, process, and serve streaming messages. + +There are two main variations of this command. The default variation +is invoked simply by `fluvio cluster start`. This will run Fluvio +cluster locally. The other variation is `fluvio cluster start --k8s`, which will start the cluster components to your +configured Kubernetes context. + +### Local +To start a cluster locally (as processes on your machine): + +```bash copy="fl" +$ fluvio cluster start +📝 Running pre-flight checks + ✅ Supported helm version is installed + ✅ Supported kubernetes version is installed + ✅ Kubernetes config is loadable + ✅ Fixed: Missing Fluvio system charts. +🖥️ SC Launched +🤖 SPU group launched (1) +👤 Profile set +🎯 Successfully installed Fluvio! +``` +### Kubernetes + + +To start a cluster on Kubernetes: + +```bash copy="fl" +$ fluvio cluster start --k8 +📝 Running pre-flight checks + ✅ Kubernetes config is loadable + ✅ Supported helm version is installed + ✅ Fixed: Missing Fluvio system charts. + ✅ Previous fluvio installation not found +🛠️ Installing Fluvio + ✅ Fluvio app chart has been installed +🔎 Found SC service addr: 172.19.0.2:30814 +👤 Profile set +🤖 SPU group launched (1) + ✅ All SPUs confirmed +🎯 Successfully installed Fluvio! +``` + +:::note +For more information on installing Fluvio with Kubernetes, go our [Kubernetes Install] section + +::: + +## `fluvio cluster delete` + +Deletes a Fluvio cluster and all data associated with it. + +:::danger + Be careful, this cannot be undone. +::: + +**Example usage** + +To uninstall Fluvio from Kubernetes (e.g. Minikube): + + +```bash copy="fl" +$ fluvio cluster delete +``` + +To uninstall Fluvio from your local machine: + + +```bash copy="fl" +$ fluvio cluster delete --local +``` + +## `fluvio cluster spu list` + +This command shows details about the active SPUs in your cluster. +It is mostly useful for checking on the status of individual SPUs +to see whether they are still online, and which addresses they live at. + +**Example usage** + + +```bash copy="fl" +$ fluvio cluster spu list + ID NAME STATUS TYPE RACK PUBLIC PRIVATE + 5001 custom-spu-5001 Online "custom" - localhost:9010 localhost:9011 +``` + +[getting started guide]: fluvio/quickstart.mdx +[Kubernetes Install]: fluvio/installation/kubernetes.mdx diff --git a/versioned_docs/version-0.13.0/fluvio/cli/fluvio/consume.mdx b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/consume.mdx new file mode 100644 index 00000000..33c76f23 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/consume.mdx @@ -0,0 +1,235 @@ +--- +sidebar_position: 60 +title: "consume" +description: "Consume data from topics using the Fluvio CLI" +--- + +# The `fluvio consume` command + +## `fluvio consume` + +The `fluvio consume` command is a way to read the contents of records in a Fluvio topic +from a command-line environment. + +The `consume` command will only read from one of those partitions, defaulting to partition `0`. + +The following `fluvio consume` examples come after the [`fluvio produce` examples]. + +## Examples + +### Consume all records + +When consuming, we need to specify a starting offset from which to begin reading. +We can use the `--from-beginning` (`-B`) flag in order to read everything from the very +beginning. Here we'll also use the `--disable-continuous` (`-d`) flag in order to exit +after all the records have been read: + + +```bash copy="fl" +$ fluvio consume my-topic -B -d +This is my first record ever +This is my second record ever +Alice In Wonderland +Bruce Wayne +Santa Claus +``` + +Notice that all the records are printed by value only: the records with keys have not +had their keys printed! This is the default behavior of the consumer. To see how to print +the keys of key/value records, see the next example! + +### Consume key/value records + +If we want to see both the keys _and_ values of the records in the topic, you can use +the `--key-value` flag: + + +```bash copy="fl" +$ fluvio consume my-topic -dB --key-value +[null] This is my first record ever +[null] This is my second record ever +[alice] Alice In Wonderland +[batman] Bruce Wayne +[santa] Santa Claus +``` + +Records that were not given a key are printed with `[null]`. + +### Consume using a SmartModule + +Fluvio SmartModules are WASM modules that can edit the contents of a stream +inline, before the records of that stream are delivered to a consumer. One way to use +SmartModules is to supply the WASM module with the `--smartmodule-path` flag to +the `fluvio consume` command. + +The simplest SmartModule is the [filter example], which +filters records from the stream based on whether they contain the letter `a` +or not. You can find the full example code [in our GitHub repo] and compile +it to test out yourself. + +Once you have compiled your SmartModule Filter and have a `.wasm` file for it, you +can apply it by sending the binary to the cluster when you start your CLI consumer: + + +```bash copy="fl" +$ fluvio consume my-topic -B --smartmodule-path="fluvio_wasm_filter.wasm" +``` + +Alternatively, to avoid sending the SmartModule binary to the cluster with each +`fluvio consume` session, you can have the cluster store it for you: + + +```bash copy="fl" +$ fluvio smartmodule create --wasm-file="fluvio_wasm_filter.wasm" my_filter +``` + +Then you can apply the SmartModule by name: + + +```bash copy="fl" +$ fluvio consume my-topic -B --smartmodule="my_filter" +``` + +### Consume from a topic with multiple partitions + +As of today, the Fluvio CLI Consumer can only consume records from a single +partition at a time. When running `fluvio consume topic-name`, the CLI will +read records from partition `0` by default. Let's look at how we can read +records from the different partitions in a topic by using the `--partition (-p)` flag. + +Start out by creating a new topic with multiple partitions using [`fluvio topic create`]. + + + + +```bash copy="fl" +$ fluvio topic create consume-multi -p 3 +``` + +Let's create a text file with some records we would like to send. Each line of the +text file will be treated as one record. + +```bash +# Put the following records into a text file using your favorite editor +$ cat records.txt +one +two +three +four +five +six +seven +eight +nine +``` + +Then, produce the test data to the topic. + + +```bash copy="fl" +$ fluvio produce "consume-multi" -f records.txt +``` + +After producing some data, let's take a look at how the records got distributed +among our partitions using [`fluvio partition list`]. + +```bash copy="fl" +$ fluvio partition list + TOPIC PARTITION LEADER REPLICAS RESOLUTION HW LEO LSR FOLLOWER OFFSETS + consume-multi 0 5001 [] Online 3 3 0 [] + consume-multi 1 5001 [] Online 3 3 0 [] + consume-multi 2 5001 [] Online 3 3 0 [] +``` + +We can see by the high watermark (HW) and log-end-offset (LEO) that 3 records were +sent to each partition. Let's look at how to consume from each partition. + +To consume from a specific partition, use the `--partition (-p)` flag on `fluvio consume`. + + +```bash copy="fl" +$ fluvio consume "consume-multi" -B --partition 0 +one +four +seven +``` + +To consume from partition 1: + + +```bash +$ fluvio consume "consume-multi" -B --partition 1 +two +five +eight +``` + +And from partition 2: + + +```bash copy="fl" +$ fluvio consume "consume-multi" -B --partition 2 +three +six +nine +``` + +### Consume from all partitions + +At times, it is useful to see all records from all partitions from a single consumer. +Using the example above: + + +```bash copy="fl" +$ fluvio partition list + TOPIC PARTITION LEADER REPLICAS RESOLUTION HW LEO LSR FOLLOWER OFFSETS + consume-multi 0 5001 [] Online 3 3 0 [] + consume-multi 1 5001 [] Online 3 3 0 [] + consume-multi 2 5001 [] Online 3 3 0 [] +``` + +Each partition has 3 records. Now let's consume from all partitions: + + +```bash copy="fl" +$ fluvio consume "consume-multi" -B -A +one +four +seven +two +three +five +six +eight +nine +``` + +:::tip + There is no order guarantee between partitions. +::: + +### Print consumed records with custom formatting + +Sometimes, the default Consumer printout might not work for your needs. As of Fluvio `0.9.6` +you can now use the `--format` string to describe how the Consumer should print your records! + +The format string will replace placeholders such as `{{key}}`, `{{value}}`, `{{partition}}`(added in Fluvio `0.9.9` ), `{{offset}}` and `{{time}}` (added in Fluvio `0.9.25`) +with the actual contents for each record. One possible use for this is formatting each record +as a CSV row: + + +```bash copy="fl" +$ fluvio consume my-topic -B --format="{{time}},{{partition}},{{offset}},{{key}},{{value}}" +2022-05-04T15:35:49.244Z,0,0,null,This is my first record ever +2022-05-04T15:35:49.244Z,0,1,null,This is my second record ever +2022-05-04T15:52:19.963Z,0,2,alice,Alice In Wonderland +2022-05-04T15:52:28.875Z,0,3,batman,Bruce Wayne +2022-05-04T15:53:37.099Z,0,4,santa,Santa Claus +``` + + +[`fluvio partition list`]: fluvio/cli/fluvio/partition.mdx#fluvio-partition-list +[`fluvio produce` examples]: fluvio/cli/fluvio/produce.mdx +[`fluvio topic create`]: fluvio/cli/fluvio/topic.mdx#fluvio-topic-create +[filter example]: smartmodules/features/operators/filter.mdx +[in our GitHub repo]: https://github.com/infinyon/fluvio/tree/master/smartmodule/examples/filter_json diff --git a/versioned_docs/version-0.13.0/fluvio/cli/fluvio/hub/_category_.json b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/hub/_category_.json new file mode 100644 index 00000000..b03a45b9 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/hub/_category_.json @@ -0,0 +1,5 @@ +{ + "position": 100, + "label": "hub", + "collapsible": true +} diff --git a/versioned_docs/version-0.13.0/fluvio/cli/fluvio/hub/download.mdx b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/hub/download.mdx new file mode 100644 index 00000000..d0446738 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/hub/download.mdx @@ -0,0 +1,47 @@ +--- +sidebar_position: 30 +title: "download" +description: "Download the SmartModule from the Hub to the local or to Cloud Cluster" +--- + +# The `fluvio hub download` command + +Download operation sends a copy of the SmartModule from the Hub to the local or Cloud cluster, defined by [`the profile`] in the Fluvio Client. + +After downloading, all SmartModules, regardless of their source, are treated the same. + +#### Download with Fluvio CLI + +Downloading a SmartModule From the Hub is as follows: + +```bash copy="fl" +$ fluvio hub download infinyon/json-sql@0.1.0 + +trying connection to fluvio router.infinyon.cloud:9003 +downloading infinyon/json-sql@0.1.0 to infinyon-json-sql-0.1.0.ipkg +... downloading complete +... checking package +... cluster smartmodule install complete +``` + +#### Download with InfinyOn Cloud + + In [InfinyOn Cloud], the Hub collapses multiple versions of the same SmartModule in one object. For example: + +``` + infinyon/regex-filter + -> `infinyon/regex-filter@0.1.0` + -> `infinyon/regex-filter@0.1.2` + -> `infinyon/regex-filter@0.2.1` + ... +``` + +1. Click Hub from the top menu. + All Public and Public-Owned SmartModules are displayed. + +3. Choose the SmartModule you want to download, pick a version, and click Download + The SmartModule is downloaded to your cluster and the state of the button changes. + +[`the profile`]: fluvio/cli/fluvio/profile.mdx +[InfinyOn Cloud]: https://infinyon.cloud/ + diff --git a/versioned_docs/version-0.13.0/fluvio/cli/fluvio/hub/list.mdx b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/hub/list.mdx new file mode 100644 index 00000000..38c7b64f --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/hub/list.mdx @@ -0,0 +1,30 @@ +--- +sidebar_position: 20 +title: "list" +description: "Retrieves all SmartModules available for download in the Hub for a specific user" +--- + +# The `fluvio hub list` command + +List operation retrieves all SmartModules available for download in the SmartModule Hub for a specific user. + +By default, users can retrieve all public SmartModules and all private SmartModules that match their group assignment. + +#### List with Fluvio CLI + +Listing Hub SmartModule from the CLI is straight forward: + +```bash copy="fl" +$ fluvio hub list + + SMARTMODULE Visibility + infinyon/jolt@0.1.0 public + infinyon/json-sql@0.1.0 public + infinyon/regex-filter@0.1.0 public + ... +``` + +[`InfinyOn Cloud`]: https://infinyon.cloud/ +[ Download Fluvio CLI ]: /cli +[Provision InfinyOn Cloud Account]: https://infinyon.cloud/ +[`Download`]: /hub/download diff --git a/versioned_docs/version-0.13.0/fluvio/cli/fluvio/partition.mdx b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/partition.mdx new file mode 100644 index 00000000..5c7c0933 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/partition.mdx @@ -0,0 +1,30 @@ +--- +sidebar_position: 40 +title: "partition" +description: "Managing Partitions in Fluvio CLI" +--- + +# The `fluvio partition` command + +Commands for partition management. + +## `fluvio partition list` + +Prints basic information about each partition in the cluster, such as +which topic it belongs to, which SPU is leading the partition, and the +various offsets the partition keeps track of. + +**Example usage** + + +```bash copy="fl" +$ fluvio partition list + TOPIC PARTITION LEADER REPLICAS RESOLUTION SIZE HW LEO LRS FOLLOWER OFFSETS + greeting 0 0 [] Online 86 B 1 1 0 [] +``` + + +More information about the columns HW, LEO, and LRS can be found in the details regarding the [Synchronization Algorithm]. + + +[Synchronization Algorithm]: fluvio/concepts/architecture/replica-election.mdx#synchronization-algorithm diff --git a/versioned_docs/version-0.13.0/fluvio/cli/fluvio/produce.mdx b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/produce.mdx new file mode 100644 index 00000000..ed54dee6 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/produce.mdx @@ -0,0 +1,296 @@ +--- +sidebar_position: 50 +title: "produce" +description: "Produce data on topics using the Fluvio CLI" +--- + +# The `fluvio produce` command + +The `fluvio produce` command is a way to send records to the leader of a partition. +Produce records by specifying the destination Topic + +## Examples + +### Produce records from stdin + +The quickest way to send a record using the producer is to just type your record +into standard input: + +```bash +$ fluvio produce my-topic +> This is my first record ever +Ok! +> This is my second record ever +Ok! +> ^C +``` + +:::tip + In order to stop the producer, we need to press `Ctrl-C` (shown above as `^C`) +::: + +As the message says, each line that you type will be sent a new record to the topic. + +The `Ok!` was printed by the producer after each record, to let us know the record +was sent successfully. + +### Produce key/value records from stdin + +Fluvio supports key/value records out-of-the-box. In a key/value record, the key is used +to decide which partition the record is sent to. Let's try sending some simple key/value records: + + +```bash copy="fl" +$ fluvio produce my-topic --key-separator=":" +> alice:Alice In Wonderland +Ok! +> batman:Bruce Wayne +Ok! +> ^C +``` + +So our records are being sent, but how do we know that the producer recognized each key properly? +We can use the `--verbose` (`-v`) flag to tell the producer to print back the keys and values it +recognizes. That way, we can be confident that our records are being sent the way we want them to be. + + +```bash copy="fl" +$ fluvio produce my-topic -v --key-separator=":" +> santa:Santa Claus +[santa] Santa Claus +Ok! +> ^C +``` + +The producer splits the key from the value and prints it in a `[key] value` format. + +### Produce using a SmartModule + +Fluvio's SmartModules can be applied to the producer to edit the contents of a stream +after the records are sent but before they are committed. One way to supply a WASM +SmartModule to the `fluvio produce` command is with the `--smartmodule-path` option. + +Below is an example of how to apply a [Map] type SmartModule to transform each record in +a stream. [This particular SmartModule] can be used to capitalize every letter in a +string. + +To use the SmartModule, compile it and provide the `.wasm` file to the producer: + + +```bash copy="fl" +$ fluvio produce my-topic --smartmodule-path="fluvio_smartmodule_map.wasm" +``` + +To avoid sending the SmartModule binary to the cluster with every producer session, you +can ask the cluster to store it for you: + + +```bash copy="fl" +$ fluvio smartmodule create --wasm-file="fluvio_smartmodule_map.wasm" my_map +``` + +Then just use use the name you provided to apply it: + + +```bash copy="fl" +$ fluvio produce my-topic --smartmodule="my_map" +``` + +### Produce key/value records to multiple partitions + +When producing to a topic with multiple partitions, the producer will send +all records with the same key to the same partition. Let's test this out by making +a multi-partition topic, then sending some key/value records. + +First, we'll use [`fluvio topic create`] to create a topic called `multi-keys` with 5 partitions: + +```bash copy="fl" +$ fluvio topic create multi-keys -p 5 +``` + +Next, let's create a text file with the records we want to send: + +```bash +# Put the following records into a text file using your favorite editor +$ cat records.txt +rafael:create account +rafael:validate account +samuel:create account +tabitha:create account +rafael:add item 1234 to cart +tabitha:validate account +samuel:validate account +rafael:add item 2345 to cart +tabitha:add item 9876 to cart +rafael:complete purchase +``` + +Then, we'll send the key/value records from the file, using the `--key-separator` flag to separate +our keys from our values. In this example, the keys are unique username. + + + +```bash copy="fl" +$ fluvio produce multi-keys --key-separator=":" -f records.txt +``` + +Looking at this sample input, we can see that `rafael` generated 5 events, `samuel` +generated 2 events, and `tabitha` generated 3 events. When we look at the partitions, +we should see the records distributed in groups of 5, 2, and 3. We can use the +[`fluvio partition list`] command to view the distribution of records in our partitions: + +```bash copy="fl" +$ fluvio partition list + TOPIC PARTITION LEADER REPLICAS RESOLUTION SIZE HW LEO LRS FOLLOWER OFFSETS + multi-keys 0 0 [] Online 0 B 0 0 0 [] + multi-keys 1 0 [] Online 0 B 0 0 0 [] + multi-keys 2 0 [] Online 157 B 3 3 0 [] + multi-keys 3 0 [] Online 220 B 5 5 0 [] + multi-keys 4 0 [] Online 119 B 2 2 0 [] +``` + +By looking at the high watermark (HW) and log-end-offset (LEO) of our partitions, +we can see how many records have landed in each partition. As we expected, they +were distributed in groups of 5, 3, and 2. Let's dig a little further, we know that +`rafael` was the key used by the group of 5 records, so we should be able to see those +records by using [`fluvio consume`] to consume from partition 3. + +```bash +$ fluvio consume multi-keys -B -p3 --key-value +[rafael] create account +[rafael] validate account +[rafael] add item 1234 to cart +[rafael] add item 2345 to cart +[rafael] complete purchase +``` + +### Producing to multiple partitions using Round-Robin + +When we produce to a topic with multiple partitions, records that have no key +are assigned to partitions in a round-robin fashion. This ensures an even load +distribution among the partitions. + +To see this in action, let's create a topic with multiple partitions using +[`fluvio topic create`]. + + +```bash copy="fl" +$ fluvio topic create multi-no-keys -p 5 +``` + +Let's produce some data to our topic. We'll use the same data from [Example 3], +but this time we won't tell the Producer to interpret our input as key-value records +(we'll do this by omitting the `--key-separator` flag). + +```bash +# Put the following records into a text file using your favorite editor +$ cat records.txt +rafael:create account +rafael:validate account +samuel:create account +tabitha:create account +rafael:add item 1234 to cart +tabitha:validate account +samuel:validate account +rafael:add item 2345 to cart +tabitha:add item 9876 to cart +rafael:complete purchase +``` + +Next, we'll produce the records into the `multi-no-keys` stream. + + + +```bash copy="fl" +$ fluvio produce multi-no-keys -f records.txt +``` + +Since records with no keys use round-robin partitioning, we should expect to see +the records be evenly distributed among the partitions. This differs from Example 3 +in that the records are not grouped by any kind of key. Let's take a look at our +partitions using [`fluvio partition list`]. + + +```bash copy="fl" +$ fluvio partition list + TOPIC PARTITION LEADER REPLICAS RESOLUTION SIZE HW LEO LRS FOLLOWER OFFSETS + multi-no-keys 0 0 [] Online 120 B 2 2 0 [] + multi-no-keys 1 0 [] Online 121 B 2 2 0 [] + multi-no-keys 2 0 [] Online 124 B 2 2 0 [] + multi-no-keys 3 0 [] Online 126 B 2 2 0 [] + multi-no-keys 4 0 [] Online 127 B 2 2 0 [] +``` + +Notice how the high watermark (HW) and log-end-offset (LEO) tell us that there are +exactly 2 records in each partition. Our ten records have been evenly distributed! + +### Producing using a compression algorithm (GZIP) + +Fluvio support different types of compression algorithms to send records. +Compression, in general, improves throughput in exchange of some CPU cost to compress/uncompress the data. + +Let's try to use `gzip` algorithm in the CLI. + +First, we'll use [`fluvio topic create`] to create a topic called `compressed` and other topic called `uncompressed`: + +```bash copy="fl" +$ fluvio topic create compressed +``` + +```bash copy="fl" +$ fluvio topic create uncompressed +``` + +Next, let's create a text file called `records.txt` with the following contents: + + +```bash +{"ts":"2020-06-18T10:44:12","started":{"pid":45678}} +{"ts":"2020-06-18T10:44:13","logged_in":{"username":"foo"},"connection":{"addr":"1.2.3.4","port":5678}} +{"ts":"2020-06-18T10:44:15","registered":{"username":"bar","email":"bar@example.com"},"connection":{"addr":"2.3.4.5","port":6789}} +{"ts":"2020-06-18T10:44:16","logged_out":{"username":"foo"},"connection":{"addr":"1.2.3.4","port":5678}} +{"ts":"2020-06-18T10:49:29","logged_in":{"username":"foo"},"connection":{"addr":"1.2.3.4","port":5678}} +{"ts":"2020-06-18T10:50:13","logged_in":{"username":"bar"},"connection":{"addr":"2.3.4.5","port":6789}} +{"ts":"2020-06-18T10:51:13","logged_out":{"username":"bar"},"connection":{"addr":"2.3.4.5","port":6789}} +``` + +Next, we'll produce the records from that file into the `compressed` stream using the `--compression gzip` option. + + + +```bash copy="fl" +$ fluvio produce compressed -f records.txt --compression gzip +``` + +Let's produce also the same to the `uncompressed` stream using no compression. + + + +```bash copy="fl" +$ fluvio produce uncompressed -f records.txt # when no --compression flag is passed, it used `none` as compression algorithm +``` + +Since records are compressed in the producer before are sent to the SPU, their disk usage on the SPU should be lower than without compression. Let's take a look at the disk usage by the partitions using [`fluvio partition list`]. + + +```bash copy="fl" +$ fluvio partition list + TOPIC PARTITION LEADER REPLICAS RESOLUTION SIZE HW LEO LRS FOLLOWER OFFSETS + compressed 0 0 [] Online 328 B 7 7 0 [] + uncompressed 0 0 [] Online 821 B 7 7 0 [] +``` + +Notice how the SIZE field tell us that the `compressed` topic is using less disk space than the `uncompressed` topic for the same amount of records. + +Also note that, [`consuming`] from topics is done at the same way for both compressed and uncompressed data. + + +[`fluvio topic create`]: fluvio/cli/fluvio/topic.mdx#fluvio-topic-create +[`fluvio partition list`]: fluvio/cli/fluvio/partition.mdx#fluvio-partition-list +[`fluvio consume`]: fluvio/cli/fluvio/consume.mdx#fluvio-consume +[`fluvio topic create`]: fluvio/cli/fluvio/topic.mdx#fluvio-topic-create + +[Map]: smartmodules/features/operators/map.mdx +[This particular SmartModule]:https://github.com/infinyon/fluvio/tree/fdcfce51067a44c06a91bc8e4aab518f0a193145/smartmodule/examples/map +[Example 3]: fluvio/cli/fluvio/produce.mdx#produce-keyvalue-records-to-multiple-partitions +[`consuming`]: fluvio/cli/fluvio/consume.mdx diff --git a/versioned_docs/version-0.13.0/fluvio/cli/fluvio/profile.mdx b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/profile.mdx new file mode 100644 index 00000000..3829cf29 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/profile.mdx @@ -0,0 +1,148 @@ +--- +sidebar_position: 70 +title: "profile" +description: "Manage Cluster Connection Details using Fluvio CLI" +--- + +# The `fluvio profile` command + +A Fluvio profile contains the a reference to cluster's connection details. + +Profile details are stored in the Fluvio configuration file at `~/.fluvio/config`. + +### `fluvio profile current` + +Prints out the name of the active Fluvio profile. + +```bash +Print the name of the current context + +Usage: fluvio profile current + +Options: + -h, --help Print help +``` + +**Example usage** + + +```bash copy="fl" +$ fluvio profile current +local +``` + +### `fluvio profile delete` + +Deletes the specified Fluvio cluster connection profile from your Fluvio configuration (`~/.fluvio/config`). + +This will not delete a cluster. See [`fluvio cluster delete`] for more info. + +### `fluvio profile delete-cluster` + +Deletes only cluster connection information from your profile. + +This will not delete a cluster. See [`fluvio cluster delete`] for more info. + +**Example usage** + + +```bash copy="fl" +$ fluvio profile delete-cluster local +Cluster local deleted +``` + +### `fluvio profile list` + +Prints a table of your profiles, including the address of the associated +cluster and which profile is active. + +**Example usage** + + +```bash copy="fl" +$ fluvio profile list + PROFILE CLUSTER ADDRESS TLS + * minikube minikube 10.99.16.213:9003 Disabled + local local localhost:9003 Disabled +``` + +### `fluvio profile rename` + +```bash +Rename a profile + +Usage: fluvio profile rename + +Arguments: + The name of the profile to rename + The new name to give the profile + +Options: + -h, --help Print help +``` + +### `fluvio profile switch` + +```bash +Switch to the named profile + +Usage: fluvio profile switch + +Arguments: + + +Options: + -h, --help Print help +``` + +### `fluvio profile sync` + +```bash +Sync a profile from a cluster + +fluvio profile sync + +Commands: + k8 Sync a profile from a Kubernetes cluster + local Sync a profile from a local cluster + +Options: + -h, --help Print help +``` + +### `fluvio profile export` + +```bash +Export a profile for use in other applications. + +Usage: fluvio profile export [OPTIONS] [PROFILE_NAME] + +Arguments: + [PROFILE_NAME] + +Options: + -O, --output [default: json] [possible values: toml, yaml, json] + -h, --help Print help +``` + +By default a fluvio config in toml format is exported which can be used by setting environment variable FLV_PROFILE_PATH +to the exported file for configuration in environments like CI workflows or Docker containers. + +### `fluvio profile add` + +```bash +Manually add a profile (advanced) + +Usage: fluvio profile add [INSTALLATION_TYPE] + +Arguments: + Name of profile to add + address of cluster, e.g. 127.0.0.1:9003 + [INSTALLATION_TYPE] Installation type of cluster, e.g. local, local-k8, k8 + +Options: + -h, --help Print help +``` + + +[`fluvio cluster delete`]: fluvio/cli/fluvio/cluster.mdx#fluvio-cluster-delete \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/fluvio/cli/fluvio/table-format.mdx b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/table-format.mdx new file mode 100644 index 00000000..dac5b0c8 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/table-format.mdx @@ -0,0 +1,414 @@ +--- +sidebar_position: 110 +title: "table-format" +description: "Consumer Table Format" +--- + +# The `fluvio table-format` command + +Table Format is used to customize the behavior of the Fluvio consumer output type [`full-table`]. + +With `table-format`, you can control the column labels, column ordering and control which keys are primary for displaying your live event data as row updates. + +## `fluvio table-format` + +``` +Install Fluvio plugins + +The Fluvio CLI considers any executable with the prefix `fluvio-` to be a CLI plugin. For example, an executable named `fluvio-foo` in your PATH may be invoked by running `fluvio foo`. + +This command allows you to install plugins from Fluvio's package registry. + +Usage: fluvio install [OPTIONS] [PACKAGE] + +Arguments: + [PACKAGE] + The ID of a package to install, e.g. "fluvio/fluvio-cloud" + +Options: + --develop + Install the latest prerelease rather than the latest release + + If the package ID contains a version (e.g. `fluvio/fluvio:0.6.0`), this is ignored + + --hub + When this flag is provided, use the hub. Dev-only + + --use-hub-defaults + Use local hub defaults. Implied if INFINYON_HUB_REMOTE or FLUVIO_CLOUD_PROFILE env vars are set - Dev-only + + --channel + When this flag is provided, use the hub. Dev-only + + --target + override default target arch determination + + -h, --help + Print help (see a summary with '-h') +``` + +This is the schema for the Table Format yaml config used by `fluvio table-format create` + +You only need to give your Table Format a name, and an input format (currently only JSON is supported) + +### TableFormat Config schema + +This is a definition of the TableFormat config schema. Below are the descriptions of each field of the config file. + +Check out the [examples](#examples) section below to see a few different config files and their resulting table views. + +```yaml +type: object +required: ["name"] +properties: +name: + type: string + minimum: 1 + maximum: 100 +inputFormat: + type: string + enum: + - JSON +columns: + type: array + items: + type: object + properties: + headerLabel: + type: string + keyPath: + type: string + primaryKey: + type: boolean +``` + +#### Field descriptions +##### name +Required + +This is the name of your Table Format. You'll see this name when you run `fluvio table-format list`, and you'll use this name with `fluvio consume topic-name --table-format ` + +##### inputFormat +Required + +The only supported option for this field is `"JSON"` + +##### columns +optional array - The default column display will be the top-level keys (ordered alphabetically). + +Each element references a key from input json object. + +The ordering of each element is important, as it will be the order columns will be rendered. + +##### keyPath +This is the only required column field. This should be a top-level key. If the key path doesn't exist, the column will print with no data. + +##### headerLabel +optional - default uses key name. Override the label of the column. + +##### primaryKey +optional - default false. If specified to true, rendering updates to the table will compare the values of primary keys to define a set. When new data matches an existing set, it's row will be updated. Otherwise it will append a new row to the table. + +#### Examples + +For the following examples, we'll start off with our topic data arriving in this order. + +```json +{"key1":"a","key2":"1","key3":"Alice","id":123} +{"key1":"b","key2":"2","key3":"Bob","id":456} +{"key1":"c","key2":"3","key3":"Carol","id":789} +[{"key1":"x","key2":"10","key3":"Alice","id":123},{"key1":"y","key2":"20","key3":"Bob","id":456},{"key1":"c","key2":"30","key3":"Carol","id":789}] +``` + +The expected shape of the data is either: +* a JSON object +* a JSON array of objects + +##### Example 0 + +**No table-format** + +Using the [`full-table`] output without using a table-format print each key into a column in alphabetical order from left to right. + +```shell copy="fl" +$ fluvio consume event-data -B --output full-table +``` + +Output: + +``` +┌('c' to clear table | 'q' or ESC to exit) | Items: 6─────────────────┐ +│id key1 key2 key3 │ +│123 a 1 Alice │ +│456 b 2 Bob │ +│789 c 3 Carol │ +│123 x 10 Alice │ +│456 y 20 Bob │ +│789 c 30 Carol │ +└─────────────────────────────────────────────────────────────────────┘ +``` + + +##### Example 1 + +**Display a subset of data** + +In this example, we only want to display data for only 2 of the keys. The ordering of the columns will be `key1` first, then `key2`. + +Config: + +```yaml +# exampleformat1.yaml +name: "exampleformat1" +inputFormat: "JSON" +columns: + - keyPath: "key1" + - keyPath: "key2" +``` + +Create the `table-format`: + +```shell copy="fl" +$ fluvio table-format create --config exampleformat1.yaml +``` + +Display your table: + +```shell copy="fl" +$ fluvio consume event-data -B --output full-table --table-format exampleformat1 +``` + +Output: + +``` +┌('c' to clear table | 'q' or ESC to exit) | Items: 6─────────────────┐ +│key1 key2 │ +│a 1 │ +│b 2 │ +│c 3 │ +│x 10 │ +│y 20 │ +│c 30 │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +##### Example 2 + +**Reorder columns** + +In this example, we rearrange the order so that the columns will be ordered: `id`, `key3`, `key1`, `key2` + +Config: + +```yaml +# exampleformat2.yaml +name: "exampleformat2" +inputFormat: "JSON" +columns: + - keyPath: "id" + - keyPath: "key3" + - keyPath: "key1" + - keyPath: "key2" +``` + +Create the `table-format`: + +```shell copy="fl" +$ fluvio table-format create --config exampleformat2.yaml +``` + +Display your table: + +```shell copy="fl" +$ fluvio consume event-data -B --output full-table --table-format exampleformat2 +``` + +Output: + +``` +┌('c' to clear table | 'q' or ESC to exit) | Items: 6─────────────────┐ +│id key3 key1 key2 │ +│123 Alice a 1 │ +│456 Bob b 2 │ +│789 Carol c 3 │ +│123 Alice x 10 │ +│456 Bob y 20 │ +│789 Carol c 30 │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +##### Example 3 + +**Rename columns** + +In this example, we're rearranging the order of the columns, and changing the column header to something more meaningful for our data. + +Config: + +```yaml +# exampleformat3.yaml +name: "exampleformat3" +inputFormat: "JSON" +columns: + - keyPath: "id" + headerLabel: "ID" + - keyPath: "key3" + headerLabel: "Name" + - keyPath: "key2" + headerLabel: "Number" + - keyPath: "key1" + headerLabel: "Letter" +``` + +Create the `table-format`: + +```shell copy="fl" +$ fluvio table-format create --config exampleformat3.yaml +``` + +Display your table: + +```shell copy="fl" +$ fluvio consume event-data -B --output full-table --table-format exampleformat3 +``` + +Output: + +``` +┌('c' to clear table | 'q' or ESC to exit) | Items: 6─────────────────┐ +│ID Name Number Letter │ +│123 Alice 1 a │ +│456 Bob 2 b │ +│789 Carol 3 c │ +│123 Alice 10 x │ +│456 Bob 20 y │ +│789 Carol 30 c │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +##### Example 4: Choose primary key for row-updates + +For event-sourced data, it may be beneficial to display the most up-to-date data by updating the row with current values. To do this, we select a primary key within the data. + +When new data arrives, if the values at the primary key match, we replace the row with the more recent data. + +Config: + +```yaml +# exampleformat4.yaml +name: "exampleformat4" +inputFormat: "JSON" +columns: + - keyPath: "id" + headerLabel: "ID" + primaryKey: true + - keyPath: "key3" + headerLabel: "Name" + - keyPath: "key2" + - keyPath: "key1" +``` + +Command: + +```shell copy="fl" +$ fluvio table-format create --config exampleformat4.yaml +``` + +Display your table: + +```shell copy="fl" +$ fluvio consume event-data -B --output full-table --table-format exampleformat4 +``` + +Output: + +``` +┌('c' to clear table | 'q' or ESC to exit) | Items: 3─────────────────┐ +│ID Name key2 key1 │ +│123 Alice 10 x │ +│456 Bob 20 y │ +│789 Carol 30 c │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## Consumer output table formatting + +This document covers two of the the CLI Consumer's output types. +* [`--output table`](#table) which is a simple formatted table +* [`--output full-table`](#full-table) which is a text-based user interface, with features such as live row-based updates, and column customization via [`table-format`] + + +To demonstrate the table output, we're going to use the following input + +Example initial topic input + +```json +{"key1":"a","key2":"1","key3":"Alice","id":123} +{"key1":"b","key2":"2","key3":"Bob","id":456} +{"key1":"c","key2":"3","key3":"Carol","id":789} +[{"key1":"x","key2":"10","key3":"Alice","id":123},{"key1":"y","key2":"20","key3":"Bob","id":456},{"key1":"c","key2":"30","key3":"Carol","id":789}] +``` + +The expected shape of the data is either: +* a JSON object +* a JSON array of objects + +## table + +By default the top-level object keys will be used as the column names, sorted by alphabetical order. For more customizability, please use the [`full-table`] output + +Example command: + +```shell copy="fl" +$ fluvio consume example-topic --output table -B +``` + +Example output: +``` + id | key1 | key2 | key3 + 123 | a | 1 | Alice + 456 | b | 2 | Bob + 789 | c | 3 | Carol + 123 | x | 10 | Alice + 456 | y | 20 | Bob + 789 | c | 30 | Carol +``` + + +## full-table + +By default the top-level object keys will be used as the column names, sorted by alphabetical order. + +Example command: + +```shell copy="fl" +$ fluvio consume example-topic --output full-table -B +``` + +Example output: +``` +┌('c' to clear table | 'q' or ESC to exit) | Items: 6─────────────────┐ +│id key1 key2 key3 │ +│123 a 1 Alice │ +│456 b 2 Bob │ +│789 c 3 Carol │ +│123 x 10 Alice │ +│456 y 20 Bob │ +│789 c 30 Carol │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +You can scroll with +* `up`/`down` arrow keys or the mouse scroll wheel to move one row at a time +* `Page up`/`Page down` to move 5 rows up/down at a time +* `Home` to move to the top of the table +* `End` to move to the bottom of the table +* `c` to clear the table state +* `q` or `ESC` to exit the table + +### Customize the `full-table` table +You may have json data that isn't most effectively displayed with the keys ordered alphabetically. Or your data is event sourced, and you only want to see the most recent data organized by one or more primary keys. + +In that case, to customize the `full-table` output, you can provide the name of your `table-format` + +`fluvio consume --output full-table --table-format ` diff --git a/versioned_docs/version-0.13.0/fluvio/cli/fluvio/topic.mdx b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/topic.mdx new file mode 100644 index 00000000..e5f68335 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/topic.mdx @@ -0,0 +1,185 @@ +--- +sidebar_position: 10 +title: "topic" +description: "Managing Topics in Fluvio CLI" +--- + +# The `fluvio topic` command + +The `fluvio topic` subcommands are used to create and delete topics, as +well as to view basic information about existing topics. + +## `fluvio topic create` + +This command is used to create new Fluvio topics. + +**Example usage** + + +```bash copy="fl" +$ fluvio topic create greeting +topic "greeting" created +``` + +### Retention + +Retention is a policy for how data is cleaned up from a topic. + +* For a time-based policy, use `--retention-time` +* For a segment-size based policy, use `--segment-size` + +Check [the docs for more info about data retention] + +**Example usage** + +In this example, the last segment of 500k will be deleted after 30 days. + + +```bash copy="fl" +$ fluvio topic create my-topic --retention-time '30 days' --segment-size 500000 +topic "my-topic" created +``` + + +### Compression + +This configuration will set compression at a topic level. When set producers are forced to use a compression algorithm that matches with the topic configuration. The SPU will reject any Produce request +that does not match with the topic configuration. + +If `--compression-type any` is used, SPU will accept any compression algorithm. + +possible values: +* `any`(default) +* `none` +* `gzip` +* `lz4` +* `snappy` +* `zstd` + +**Example usage** + + +```bash copy="fl" +$ fluvio topic create my-topic --compression-type gzip +topic "my-topic" created +``` + +In this example, the topic `my-topic` will be created with compression type `gzip`. + +### replication assignment + +By default, Fluvio will automatically assign replicas to SPUs. However, you can manually assign replicas to SPUs by using the `--replica-assignment` flag. + +Please refer to following [replica] sections for detail of replica assignment. + +Note that in order to replication assignment to work, you need to have at least 2 SPUs in your cluster. + +**Example usage** + +In this example, we assign first replica to SPU 0, second replica to SPU 1. +First we create replica assignment file `replica.json`. +```json +[ + { + "id": 0, + "replicas": [ + 0, + 1 + ] + } +] +``` +The `replicas` fields correspond to the SPU ids. You can get SPU ids by running `fluvio cluster spu list`. + +Then we create topic with replica assignment file. +```bash +$ fluvio topic create my-topic --replica-assignment replica.json +topic "my-topic" created +``` + +Use partition commands to show that topic has been created with replica assignment. + +```bash + + $ fluvio partition list + TOPIC PARTITION LEADER REPLICAS RESOLUTION SIZE HW LEO LRS FOLLOWER OFFSETS + my-topic 0 0 [1] Online 0 B 0 0 0 0 [ReplicaStatus { spu: 1, hw: -1, leo: -1 }] + +``` + +## `fluvio topic list` + +This command shows you all the existing topics in your cluster, as well as some basic +information about them, including how many partitions a topic has and how many times it +is replicated. + +**Example usage** + + +```bash copy="fl" +$ fluvio topic list + NAME TYPE PARTITIONS REPLICAS IGNORE-RACK STATUS REASON + greeting computed 1 1 resolution::provisioned +``` + +--- + +## `fluvio topic describe` + +This command prints more detailed information about a specific topic. + +**Example usage** + + +```bash copy="fl" +$ fluvio topic describe greeting + Name : greeting + Type : computed + Partition Count : 1 + Replication Factor : 1 + Ignore Rack Assignment : false + Status : provisioned + Reason : + ----------------- +``` + +--- + +## `fluvio topic delete` + +This command deletes an existing Fluvio topic and all data associated with it. +This data may not be recovered, so use this with care. + +**Example usage** + + +```bash copy="fl" +$ fluvio topic delete greeting +topic "greeting" deleted +``` + +## `fluvio topic add-partition` + +This command adds a partition to an existing topic. +It will return the partition number and the SPU id where the partition is located. + +**Example usage** + + +```bash copy="fl" +$ fluvio topic add-partition greeting + PARTITION SPU + 1 5001 +``` + +You can also specify the number of new partitions to add using the `-c` or `--count` flag. + +```bash copy="fl" +$ fluvio topic add-partition greeting -c 3 + PARTITION SPU + 2 5002 + 3 5003 + 4 5001 +``` + +[the docs for more info about data retention]: fluvio/concepts/operations/data-retention.mdx diff --git a/versioned_docs/version-0.13.0/fluvio/cli/fluvio/version.mdx b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/version.mdx new file mode 100644 index 00000000..12698d89 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/fluvio/version.mdx @@ -0,0 +1,21 @@ +--- +sidebar_position: 80 +title: "version" +description: "Manage Cluster Connection Details using Fluvio CLI" +--- + +# The `fluvio version` command + +The `fluvio version` command prints out build information about the the +`fluvio` binary and any plugin extensions + +```bash title="connector-example.yaml" copy="fl" +fluvio version + Fluvio CLI : 0.11.8 + Fluvio CLI Arch : aarch64-apple-darwin + Fluvio CLI SHA256 : a3e1fe52453e9edb8a6da5895614bb02064d5ec91032713904add50a8686c11e + Fluvio Channel Frontend SHA256 : a3e1fe52453e9edb8a6da5895614bb02064d5ec91032713904add50a8686c11e + Fluvio Platform : 0.11.8-dev (dawn-wood-08d4a618dc0d77fbcd676353de982ae8) + Git Commit : 919aa365b25a6193efb55961d5e84dc2ee804100 + OS Details : Darwin 14.3.1 (kernel 23.3.0) +``` diff --git a/versioned_docs/version-0.13.0/fluvio/cli/overview.mdx b/versioned_docs/version-0.13.0/fluvio/cli/overview.mdx new file mode 100644 index 00000000..405fffbf --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/overview.mdx @@ -0,0 +1,56 @@ +--- +sidebar_position: 1 +title: "Overview" +description: "Fluvio CLI is an all-in-one tool for installing and managing Fluvio, as well as for producing and consuming data on the command-line" +--- + +# The `fluvio` CLI + +The Fluvio CLI is an all-in-one tool for managing Fluvio, as well as for producing and consuming data on the command-line. + +:::tip + If Fluvio is not installed yet, you can install it by following the + instructions in the [quickstart]. +::: + +## Commands to Know + +Let's have a look at some common commands you'll want to know when starting +out with Fluvio. + +#### Setting up a cluster + +- [Create your own Fluvio cluster with `fluvio cluster start`] + +#### Sending and Receiving data from Fluvio + +- [Create a topic with `fluvio topic create`] +- [Produce data to a topic with `fluvio produce`] +- [Consume data from a topic with `fluvio consume`] +#### Enriching data with SmartModules + +- [Overview for SmartModules] +- [Write a custom filtering SmartModule] +- [Consume enriched data using SmartModules] + +#### Viewing the status of the cluster + +- [See all of your topics with `fluvio topic list`] +- [See your partitions and data replication with `fluvio partition list`] +- [See the status of the SPUs in your cluster with `fluvio cluster spu list`] + +[quickstart]: fluvio/quickstart.mdx + +[Create your own Fluvio cluster with `fluvio cluster start`]: fluvio/cli/fluvio/cluster.mdx + +[Create a topic with `fluvio topic create`]: fluvio/cli/fluvio/topic.mdx +[Produce data to a topic with `fluvio produce`]: fluvio/cli/fluvio/produce.mdx +[Consume data from a topic with `fluvio consume`]: fluvio/cli/fluvio/consume.mdx + +[Overview for SmartModules]: smartmodules/overview.mdx +[Write a custom filtering SmartModule]: smartmodules/features/operators/filter.mdx +[Consume enriched data using SmartModules]: fluvio/cli/fluvio/consume.mdx#consume-using-a-smartmodule + +[See all of your topics with `fluvio topic list`]: fluvio/cli/fluvio/topic.mdx#fluvio-topic-list +[See your partitions and data replication with `fluvio partition list`]: fluvio/cli/fluvio/partition.mdx#fluvio-partition-list +[See the status of the SPUs in your cluster with `fluvio cluster spu list`]: fluvio/cli/fluvio/cluster.mdx#fluvio-cluster-spu-list \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/fluvio/cli/smdk.mdx b/versioned_docs/version-0.13.0/fluvio/cli/smdk.mdx new file mode 100644 index 00000000..2d4d04d2 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/cli/smdk.mdx @@ -0,0 +1,9 @@ +--- +sidebar_position: 30 +title: "smdk (link)" +description: "smdk is a tool for developing Fluvio connectors" +--- + +CDK is defined in [SmartModules]. + +[SmartModules]: smartmodules/smdk.mdx \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/_category_.json b/versioned_docs/version-0.13.0/fluvio/concepts/_category_.json new file mode 100644 index 00000000..a74083c7 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "Concepts", + "collapsed": true, + "position": 50 +} diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/advanced/_category_.json b/versioned_docs/version-0.13.0/fluvio/concepts/advanced/_category_.json new file mode 100644 index 00000000..28a01ae1 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/advanced/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "Advanced", + "collapsed": true, + "position": 3 +} diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/advanced/cluster-defaults.mdx b/versioned_docs/version-0.13.0/fluvio/concepts/advanced/cluster-defaults.mdx new file mode 100644 index 00000000..fe94ffc4 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/advanced/cluster-defaults.mdx @@ -0,0 +1,110 @@ +--- +sidebar_position: 5 +title: "Configuration Defaults" +--- + +## Overview + +When a Fluvio Cluster is created without extra params, either in Kubernetes +or locally certain assumptions on limitations are made. + +This document outlines the default configurations for a Fluvio Cluster. + +## Default Configurations + +### Cluster + +| Resource | Default Value | +|-|-| +| SPUs | `1` | + +### SPU Group + +| Resource | Default Value | +|-|-| +| Number of replicas | `1` | +| Minimum ID in Group | `0` | +| Store Size | `10 GB` | + +### SPUs + +| Resource | Default Value | +|-|-| +| SPU ID | `0` | +| SPU Type | Managed | +| Public Endpoint's IP Address | `localhost:30004` +| Public Endpoint Encryption | `PLAINTEXT` | +| Private Endpoint's IP Address | `fluvio-spg-main.default.svc.cluster.local:9006` +| Private Endpoint Encryption | `PLAINTEXT` | + +:::tip + By default SPU communication have no encryption enabled.
+ It is recommended to enable encryption for production clusters. +::: + +### Topic + +| Resource | Default Value | +|-|-| +| Compression Type | Not specified. | +| Partitions | `1` | +| Replicas | `1` | +| Segment Size | `1 GB` | + +### Partition + +| Resource | Default Value | +|-|-| +| Replicas | `0` + +## Overriding Defaults + +### Cluster + +#### SPUs + +**Specify the number of SPUs** + +```bash copy="fl" +$ fluvio cluster start --spu 3 +``` + +### SPU Group + +#### Storage Size + +Storage size can be specified when starting a Fluvio Cluster. + +```bash copy="fl" +$ fluvio cluster start --spu-storage-size '50 GB' +``` + +### Topic + +#### Segment Size + +A topic's segment size can be specified when creating a topic. + +**Specify a segment size using units** + +```bash copy="fl" +$ fluvio topic create foo --segment-size '1 GB' +``` + +**Specify a segment size using bytes (50 MB)** + +```bash copy="fl" +$ fluvio topic create foo --segment-size '50000000' +``` + +### Partition + +#### Replicas + +A partition's replicas can be specified when creating a topic. + +**Specify the number of replicas** + +```bash copy="fl" +$ fluvio topic create foo --replicas 3 +``` diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/advanced/crds.mdx b/versioned_docs/version-0.13.0/fluvio/concepts/advanced/crds.mdx new file mode 100644 index 00000000..d5fe7a68 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/advanced/crds.mdx @@ -0,0 +1,76 @@ +--- +sidebar_position: 2 +title: "CRDs" +--- + +# Custom Resource Definitions + +The fluvio-sys chart installs the CRDs. This chart can be found in the fluvio repo at [fluvio/k8-util/helm/fluvio-sys](https://github.com/infinyon/fluvio/tree/master/k8-util/helm/fluvio-sys) + + +## SPU Group +##### [Spu Group] + +| Field | Type | Description | +|-|-|-| +| spec.replicas | integer | Number of replicas | +| spec.minId | integer | Minimum ID in group | +| spec.template.spec.rack | string | Rack name | +| spec.template.spec.publicEndpoint.port | integer | Port of public endpoint | +| spec.template.spec.publicEndpoint.encryption | string | PLAINTEXT or SSL | +| spec.template.spec.privateEndpoint.port | integer | Port of private endpoint | +| spec.template.spec.privateEndpoint.encryption | string | PLAINTEXT or SSL | +| spec.template.spec.replication.inSyncReplicaMin | integer | Minimum number of in-sync replicas | +| spec.template.spec.storage.logDir | integer | Path to data directory | +| spec.template.spec.storage.size | integer | Storage size | +| spec.template.spec.env[].name | integer | Environment variable name | +| spec.template.spec.env[].value | integer | Environment variable value | + +## SPU +##### [SPU] + +| Field | Type | Description | +|-|-|-| +| spec.spuId | string | ID of SPU | +| spec.spuType | string | Custom or Managed | +| spec.rack | string | Rack name | +| spec.publicEndpoint.ingress[].ip | string | IP address of public endpoint | +| spec.publicEndpoint.ingress[].hostname | string | Hostname of public endpoint | +| spec.publicEndpoint.port | integer | Port of public endpoint | +| spec.publicEndpoint.encryption | string | PLAINTEXT or SSL | +| spec.privateEndpoint.host | string | Hostname of private endpoint | +| spec.privateEndpoint.port | integer | Port of private endpoint | +| spec.privateEndpoint.encryption | string | PLAINTEXT or SSL | +| status.resolution | string | SPU status | + +## Topic +##### [Topic] + +| Field | Type | Description | +|-|-|-| +| spec.type | string | Type of topic | +| spec.partitions | integer | Partitions count | +| spec.replicationFactor | integer | Replication count | +| spec.ignoreRackAssignment | boolean | Ignore rack assignment | +| spec.customReplicaAssignment[].partition.id | integer | Partition ID | +| spec.customReplicaAssignment[].partition.replicas | integer | Number of replicas | +| status.resolution | string | Topic status | + +## Partition +##### [Partition] + +| Field | Type | Description | +|-|-|-| +| spec.leader | integer | Leader SPU ID | +| spec.replicas[].items | integer | Followers | +| status.resolution | string | Partition status | +| status.isBeingDeleted | boolean | Being deleted | +| status.lsr | integer | Live Replicas | +| status.leader.hw | integer | Leader High Watermark | +| status.leader.leo | integer | Leader End Offset | +| status.replicas | integer | Follower Offsets | + +[Spu Group]: fluvio/concepts/architecture/sc.mdx#spu-groups +[SPU]: fluvio/concepts/architecture/sc.mdx#spus +[Topic]: fluvio/concepts/architecture/sc.mdx#topics +[Partition]: fluvio/concepts/architecture/sc.mdx#partitions \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/advanced/network.mdx b/versioned_docs/version-0.13.0/fluvio/concepts/advanced/network.mdx new file mode 100644 index 00000000..962a7283 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/advanced/network.mdx @@ -0,0 +1,45 @@ +--- +sidebar_position: 3 +title: "Fluvio Network Access" +--- + +# Fluvio Network Access + +By default Fluvio helm charts setup services exposed via NodePorts. + +Port numbers do not change based on whether TLS is enabled, the same ports are used regardless. Additional tls ports may be used. + +Service behavior can be configured at installation by overriding values in the helm charts. + +### Service Endpoint Details + +Run `kubectl describe svc fluvio-sc-public` to view address and port information for accessing the SC public endpoint. +The configuration may vary by networking configuration as well as the type of kubernetes cluster installed. + +In this case the access point is the ingress ip address on port described by the NodePort: `127.0.0.1:30003` + +```shell +$ kubectl describe svc fluvio-sc-public +Name: fluvio-sc-public +Namespace: default +Labels: app.kubernetes.io/managed-by=Helm +Annotations: fluvio.io/ingress-address: 127.0.0.1 + meta.helm.sh/release-name: fluvio-app + meta.helm.sh/release-namespace: first +Selector: app=fluvio-sc +Type: NodePort +IP Family Policy: SingleStack +IP Families: IPv4 +IP: 10.43.5.147 +IPs: 10.43.5.147 +Port: 9003/TCP +TargetPort: 9003/TCP +NodePort: 30003/TCP +Endpoints: 10.42.0.9:9003 +Session Affinity: None +External Traffic Policy: Cluster +Events: +``` + + +SPU service names follow the pattern `fluvio-spu--`, for example the first SPU of the "main" group is named `fluvio-spu-main-0`. It can be viewed with `kubectl describe svc fluvio-spu-main-0` diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/architecture/_category_.json b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/_category_.json new file mode 100644 index 00000000..b9f24aec --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "Architecture", + "collapsed": true, + "position": 1 +} diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/architecture/auth.mdx b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/auth.mdx new file mode 100644 index 00000000..4f200d4c --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/auth.mdx @@ -0,0 +1,128 @@ +--- +sidebar_position: 5 +title: "Authentication and Authorization" +--- + +When the user logs in, the client downloads a profile and extracts the user identification from the client certificate issued by the server. The authorization is driven by simple RBAC (role-based access control), which can be extended for other more advanced authorization schemes in the future. + +# Cluster setup TLS + +Fluvio cluster must have TLS enabled for the authorization mechanism to kick in. With TLS enabled, both SC and SPU will ensure all subsequent traffic to and from the client is encrypted via mTLS. As part of TLS configuration, the cluster administrator must also define an authorization policy file (described below). + +To create a Fluvio cluster with TLS enabled, you must provide the following certs and keys: +- CA cert +- Client cert and key +- Server cert and key +- Authorization policy file + +For development and testing, you can use self-signed certs and keys. For production, you should use certs and keys signed by a trusted CA. It is important to note that the client cert and key must be signed by the same CA as the server cert and key. Otherwise, the client will not be able to connect to the server. In addition, the system admin must specify domain name (DNS) that cluster is bound to. This is required for TLS to work properly. + +## Self signed certs + +For self-signed certs, you can use sample certs templates in [fluvio](https://github.com/infinyon/fluvio/tree/master/tls). + +## Authorization policy file + +Fluvio currently implements a simple type level permission model. The authorization policy file is a JSON file that contains a list of permission entries for the role. Each entry contains a type and a list of permissions. Tye type can be one of the following object types. Current list of types are: +- CustomSpu +- Spu +- Topic +- Partition +- SpuGroup +- SmartModule +- TableFormat + +Each object type can have one or following permissions: +- All +- Read +- Write + +The `All` permission means the user has all permissions for the object type. For example, following is a policy file. It contains a two roles: `Root` and `User`. The `Root` role has all permissions for all object types. The `TopicUser` role only has read permission for `Topic` object type. + +```json +{ + "Root": { + "CustomSpu": [ + "All" + ], + "Partition": [ + "All" + ], + "Spu": [ + "All" + ], + "Topic": [ + "All" + ], + "SpuGroup": [ + "All" + ], + "ManagedConnector": [ + "All" + ], + "SmartModule": [ + "All" + ], + "TableFormat": [ + "All" + ] + }, + "TopicUser": { + "Topic": [ + "Read" + ] + } + +} +``` + +Note that, In order to set up and operator cluster as an administrator, "Root" role with full access to all objects is required. + +## Authorization Scope file + +The scope file contains mapping of users to roles. For example, here is a sample scope file that maps user `user1` and `user2` to `TopicUser` role. The `Root` role is mapped to user `root`. + +```json +{ + "Root": [ + "root" + ], + "TopicUser": [ + "user1", + "user2" + ] +} +``` + +The user id comes from the X509 client certs. It is mapped to `CN`. For example, this is a simple command to generate a client cert with `CN` set to `user1`. + +```bash +openssl req -new -key -out -subj "/C=US/ST=CA/O=MyOrg, Inc./CN=user1" +``` + +The client cert with `CN` set to `user1` can be used to connect to Fluvio cluster. The Fluvio cluster will map the `user1` to `TopicUser` role. + +## Provisioning Fluvio cluster with TLS + +Once all the certs and keys are ready, you can provision a Fluvio cluster with TLS enabled. Note that client certs must have user "root" (default policy mapping) in order to provision the cluster. Here is syntax to provision a Fluvio cluster with TLS enabled. + +```bash +fluvio cluster start --tls --domain --ca-cert ca_cert --server-cert --server-key --client-cert --client-key --authorization-policy --authorization-scope +``` + +The domain must match DNS suffice in order to TLS to work properly. For example, if you are using self-signed certs, you can use `local` or `localhost` as domain. If you are using certs signed by a trusted CA, you can use the domain name that is registered with the CA. + +# Connecting to Fluvio cluster with TLS + +In order to connect to Fluvio cluster with TLS enabled, you must provide client certs and keys. They must be signed by the same CA as the server cert and key. In addition, it must have CN(user) that is in the authorization scope file. The client TLS certs and key can be either provided in the fluvio profile file or specified in the env variables. Here is an section of fluvio profile file that contains client certs and keys. Currently TLS certs must be manually copied to config file. + + +```toml +[cluster.mycluster.tls.certs] +domain = "fluvio-cluster.mydomain.com" +key = """ + +-----BEGIN RSA PRIVATE KEY----- +MIIJKQIBAAKCAgEAtLx58BuGSbwWW6AlNb38965IWQv+IJZGPBzMs9GExohuHOai +... +``` diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/architecture/client.mdx b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/client.mdx new file mode 100644 index 00000000..769f985e --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/client.mdx @@ -0,0 +1,173 @@ +--- +sidebar_position: 7 +title: "Client Library" +--- + +A Fluvio client communicates with a Fluvio cluster to manage streams and to emit or receive events. The client uses a purpose-built communication protocol that is optimized for maximum performance, scalability, and low latency. Websocket is currently in the works, and future versions will provide adaptors to other protocols, such as: HTTP, gRPC, protobuf and more. + +Fluvio Client + +All communication between the clients and the servers is encrypted in TLS for maximum privacy and security. + +## Native Language Bindings + +The Fluvio client library is written in Rust and can be natively embedded into other programming languages. + +Check out our [APIs page] for more information + +### Fluvio CLI + +The Fluvio CLI an application written on top of the client library. The CLI can manage a Fluvio cluster as well as produce and consume data using the terminal. + +For additional information, checkout: + +* [Fluvio CLI] + +#### Future Work + +Future versions of Fluvio will provide additional programming language bindings, such as: + +* Objective-C +* C/C++ + + +## Client API + +Fluvio client library has three core APIs: Producer, Consumer, and Admin. + + +### Producer API + +The **Producer API** is responsible for sending records to data streams. A data record is a `key/value` object, where `key` is an optional field. `Key` and `value` fields can be of arbitrary format. + +For example, a producer with a `key` mapped to countries would use the following API: + +```rust +let timber_resources: HashMap<&str, i32> = + [("Norway", 100), ("Denmark", 50), ("Iceland", 10)] + .into_iter().collect(); + +producer.send(timber_resources).await; +``` + +#### Producer Behavior + +Producers can send records _one at a time_ or in _batches_. The producer API is multi-threaded, which enables applications to stream data in parallel. + + +### Consumer API + +The **Consumer API** is responsible for receiving records from data streams. Records can be retrieved _one at a time_ or _continuously_ from any position in the data stream. + +For example, a consumer reading `key/value` records (_one at a time_) from offset 100, would use the following API: + +```rust +let records = consumer.fetch(100).await; + +for record in records { + for (k, v) in record { + println!("{}, {}", k, v); + } +} +``` + +Records are transmitted in _binary format_ and it is up to the Application developer to provide a conversion into their custom type. + + +### Admin API + +The **Admin API** is the management interface for the Fluvio cluster. The API can perform the following operations: + +* _create/delete/update_ objects such as: `create topic`. +* inspect _configuration_, such as: `list spus`. +* inspect _status_, such as: partitions status - `Online`, `Offline`, or `LeaderOffline`. + + +#### Configuration Objects + +Configuration object models follow a similar paradigm. Each object has the following components: + +* **Name** - unique identifier of the object +* **Spec** - the configuration specification (aka. desired state) +* **Status** - the actual provisioning status (aka. actual state) +* **Owner** - provides a parent/child relationship (for resource removal) + +A Fluvio administrator configures the object _Spec_, and the cluster updates the object _Status_ to match. The _Status_ is a read-only element from the administrator's perspective. + +Fluvio has the following configuration objects: + +* **SPUs** - streaming processing unit (custom or managed) +* **SPGs** - groups of managed SPUs +* **topics** - data streaming configuration element +* **partitions** - provisioned data streaming element of a topic + * partitions are children of topics + +Each configuration object goes through its own lifecycle. Object status tracks the state as it progresses through various lifecycle stages. + +-> Some configuration objects such as **Partition** and **Managed** SPU are managed objects that are created as part of the parent's lifecycle, and they `cannot be directly modified` by the operator. + +For detailed schema definition and object life cycles, checkout the [Architecture Overview]. + +#### Object Outputs + +Each configuration object can converted to different data formats, such as _json_, or _yaml_. Additional data formats are available and can be exposed if required. + +Configuration objects may be fetched using filters such as `object name`. + + +#### Consumer Behavior + +Consumers are also multi-threaded which allows each consumer to read records from multiple data streams simultaneously. Each connection can specify different retrieval properties: + +* **Consistency Model** - retrieve records based on their committed state across _replicas_: + * COMMITTED: fetch only the records that have been replicated `n` times (where `n` defined by _min-live-replicas_) + * UNCOMMITTED: fetch records that have been stored by _replica leader_. When using UNCOMMITTED read strategy, it is possible to lose records that have already been seen by the consumers. Hence, it should only be used when sporadic message loss is acceptable. +* **Max Bytes** - the maximum number of bytes sent in single message. + * When a consumer fetches multiple records, the **SPU** batches the result into buffers up to the maximum number of bytes. + * Default batch size is **1Mb**. + + +### Fault Tolerance + +The Fluvio client can survive SPU failures. All data streams are replicated across multiple SPUs to prevent data loss. + +When a data stream is created, one of the SPUs is elected as leader and the others become followers. Fluvio clients look-up the SPU leaders to produce or consume records. + +Prod/Cons Failover + +If the SPU leader becomes unreachable, an election is triggered and one of the SPU followers becomes the leader. The client detects the SPU leader failure and **automatically switches over** to the new leader. + +Producer/Consumer Failover + +For additional information on the election algorithm, checkout [Election Design]. + +## Client Profiles + +The client library utilizes profiles to hide the complexity associated with the connection configuration. Furthermore, profiles allows the client library to manage multiple Fluvio clusters from the same client instance. Simply switch the profile and all subsequent operations are applied to a different cluster. + +For additional information on Profile management, checkout [Fluvio Profiles] section. + +## Client Workflow + +All client operations follow a similar pattern. + +1. Create a profile (one time operation). +2. Connect to the cluster, using the profile created above. +3. Use the **Admin API** to configure or retrieve objects (optional). +4. Produce or Consume records: + * Use the **Producer API** to send records to a data stream. + * Use the **Consumer API** to retrieve records from a data stream. + +The Fluvio Client library is multi-threaded, and it can simultaneously connect to _multiple clusters_, and concurrently _produce and consume_ one or more data streams. + +[Fluvio CLI]: fluvio/cli/overview.mdx +[Architecture Overview]: fluvio/concepts/architecture/overview.mdx +[Apis Page]: fluvio/apis/overview.mdx +[Election Design]: fluvio/concepts/architecture/replica-election.mdx +[Fluvio Profiles]: fluvio/cli/fluvio/profile.mdx \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/architecture/overview.mdx b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/overview.mdx new file mode 100644 index 00000000..bb3616db --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/overview.mdx @@ -0,0 +1,141 @@ +--- +sidebar_position: 1 +title: "Overview" +--- + +**Fluvio Data Streaming** is a modern Cloud Native software stack designed for **high speed, real-time data** processing. Fluvio is _fast_, _scalable_, _self-healing_, _pluggable_, and _user-friendly_. + +#### Built in Rust + +Fluvio is **built in Rust**, a systems programming language with **higher performance** than Java and **better code safety** than C/C++. Rust has a powerful multi-threaded asynchronous engine that runs natively in multi-core and low powered embedded systems. Zero cost abstractions and **no garbage collection** makes this language ideal for low network latency and high IO throughput systems. + +This choice of programming language makes Fluvio a low memory, high performance product that **compiles natively** in many software distributions such as MacOS, Linux, Windows, and small footprint embedded systems such as Raspberry Pi. + +#### Cloud Native by Design + +Fluvio is a **Cloud Native** platform designed to work with any infrastructure type from bare bones hardware to containerized platforms. As a **Cloud Native** first product, Fluvio is natively integrated with **Kubernetes**. Any infrastructure running **Kubernetes** can install the **Fluvio Helm Chart** and get up and running in a matter of minutes. For additional details, check out the [Kubernetes install] section. + +## High Level Architecture + +Fluvio's architecture centers around **real time streaming**, and the platform can **scale horizontally** to accommodate large volumes of data. + +Architecture Components - SC/SPUs + +A **Streaming Controller (SC)** manages the **SPU life cycle** and optimizes the distribution of data streams across the cluster. The **Streaming Processing Units (SPUs)** are responsible for data streaming. + +SCs and SPUs are **independent**, **loosely coupled** services. Each service can be **restarted**, **upgraded**, or **scaled** independently without impacting traffic. + + +## Streaming Controller (SC) + +Fluvio is designed to address a variety of **deployment scenarios** from public clouds to private data centers, edge networks and IOT devices. **SC** maintains the **topology map** of the **SPUs** and serves as the first point of contact for producers and consumers. + +DC, Cloud, Edge, IoT + +The **SC** handles the **topology map** dynamically to simplify complex tasks such as increasing capacity, adding new infrastructure, or attaching a new geo-locations. + +For a deep dive in the SC design, checkout the [SC Architecture] section. + +## Streaming Processing Unit (SPU) + +**Streaming Processing Units (SPUs)** are responsible for all data streaming related matters. Each SPU **receives** data from producers, **sends** data to consumers, and **saves** copies of the data to local storage. + +SPU produce/consume & replication + +SPUs are also responsible for **data replication**. Data streams that are created with a __replication factor__ of 2 or more are managed by __a cluster__ of SPUs. One SPU is elected as leader and all others are followers. The leader receives the data from producers and forwards a copy to followers. Followers save a copy in their local storage. If the leader goes offline, one of the followers takes over as leader. For additional information, check out [Replica Election]. + +Each SPU performs **leader** and **follower** duties **on multiple data streams** in parallel. For optimal performance, Fluvio utilizes all available **CPU cores**. + +For a deep dive into the SPU design, check out the [SPU Architecture] section. + +## Topic/Partitions + +A Topic in a streaming platform is like a Table in a database. Suppose you're building an online chat service. +You may have a "Chatroom" topic that streams events such as "Sent Message" and "Viewed Message", as well as other +related events. You can think of a Topic as a category of events that are related under your domain. + +Now suppose that your chat service becomes a wild success and you have thousands of users. In order to keep up +with the increased traffic, you can divide your topic into multiple Partitions. This allows the events in your +topic to be distributed between multiple SPUs in parallel, increasing your traffic capacity by just changing a setting. + +For example, a configuration with the 2 topics generates the replication map in the diagram: + +* **topic-a** => 2 partitions, 2 replicas +* **topic-b** => 1 partition, 3 replicas + +SPU-1 is the leader for **topic-a/0** , SPU-2 for **topic-a/1**, and SPU-3 for **topic-b/0**. + +Topic/Partitions + +For additional information on partitions and replica assignments, checkout [Replica Assignment]. + +## Data Persistence + +SPU leaders **save** all data stream messages received from producers on **local storage**. Based on platform availability, SPUs use **zero-copy IO** to transfer data from disk to network. Messages on local storage are **immutable** and **ordered**. Fluvio guarantees **in-order writes** for all messages received on the same **replica**. + +Data Storage + +SPU persistence is designed as **single-writer, multi-reader** with **zero-copy writes**. Each SPU can save large volumes of data at **wire speed**, and serve consumers and producers in **near real-time**. + +Fluvio persists messages in the local storage until any eviction condition is met. It supports **time-based** and +**size-based** conditions. Both always are checked and do not exclude each other. The time-based condition +is specified by using `retention-time` property of the topic. The retention time is an **age** after which older +segments will be **deleted** from the partition. The size-based condition allows setting max size per partition. +If **partition size** exceeds the configured max size, Fluvio **deletes the oldest segment** in the partition. + +The data eviction operates on a **segments level**. Hence, the frequency and accuracy depend on the **granularity** of +segments. The higher the segment size, the less often it is **evicted**, and the longer the records stay alive. +The **real retention period** of all records within a segment will be the age of **the latest record** in the segment. + +Fluvio provides the **best-effort** guarantee of size-based and time-based eviction. For short periods of time these +rules may be violated. The retention periods and max partition size should be set to cover **up to 80%** of the disk size. +If the disk is full before the eviction is triggered, the SPU stops accepting messages and the overall health of the +system may be compromised. + +For additional information on retention conditions, checkout [Data Retention]. + +## APIs + +The Fluvio architecture places heavy emphasis on clean, **user-friendly APIs**. There are two types of APIs, **external** and **internal**. The APIs use **TLS** to ensure secure communication. + +### External APIs + +**External APIs** are used by the **Fluvio CLI** and a growing number of programming language interfaces, such as **Node** and **Rust**. There are two categories of APIs, control plane APIs and data plane APIs. **Control Plane APIs** manage the life cycle of the cluster objects such as SPUs, topics, and replicas. **Data Plane APIs** handle data access for producers and consumers. + +External APIs + +API reference guides for programming languages are available at: + +* Node API +* Rust API +* Java API +* Python API + +### Internal APIs + +**Internal APIs** are used by the **SC** to communicate with the **SPUs** and for the **SPUs** to communicate with their peers to elect leaders and exchange replica information. + +Internal APIs + +For additional details about **Internal APIs** checkout Fluvio development guide on github. + +[Kubernetes install]: fluvio/installation/kubernetes.mdx +[SC Architecture]: fluvio/concepts/architecture/sc.mdx +[Replica Election]: fluvio/concepts/architecture/replica-election.mdx +[SPU Architecture]: fluvio/concepts/architecture/spu.mdx +[Replica Assignment]: fluvio/concepts/architecture/replica-assignment.mdx +[Data Retention]: fluvio/concepts/operations/data-retention.mdx \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/architecture/replica-assignment.mdx b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/replica-assignment.mdx new file mode 100644 index 00000000..6928f456 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/replica-assignment.mdx @@ -0,0 +1,242 @@ +--- +sidebar_position: 4 +title: "Replica Assignment" +--- + +**Replica assignment** algorithm is triggered by topic creation and it is responsible for a building a **balanced distribution** of replicas across the SPUs in a Fluvio cluster. **Replicas** from different SPUs are grouped in **replica sets**, where each replica saves a copy of a data stream. Each replica set has a leader and one or more followers that are distributed across available SPUs. + +For additional information on replica sets, checkout [replica election]. + +## Replica Assignment Algorithm + +Fluvio replica assignment algorithm ensures that the replica leader and followers are evenly distributed across available SPUs. If you'd rather deploy your own replication algorithm, use [Manual Replica Assignment](#manual-replica-assignment) instead. + +### Computed Replica Assignment (CRA) + +Fluvio uses computed replica assignment algorithm to generate a **replica map** anytime a new topic is created. The algorithm takes into account the following parameters and configuration objects: + +* a list of SPUs +* the number of partitions +* the replication factor +* ignore rack assignment flag + + +#### Algorithm + +The algorithm uses a **round-robin**, **gap-enabled** distribution assignment. + +In a cluster with **4** SPUs, a topic created with: + +| Replicas | Partitions | +| :------: | :--------: | +| **3** | **15** | + +The algorithm generates the following replica distribution: + +```text +------------------------------------------ +| idx | 5 x SPUs | gaps | +------------------------------------------ +| 0 |012 | 0 | +| 1 | 123 | 0 | +| 2 | 234 | 0 | +| 3 | 340 | 0 | +| 4 | 401 | 0 | +| 5 | 0 23 | 1 | +| 6 | 1 34 | 1 | +| 7 | 2 40 | 1 | +| 8 | 3 01 | 1 | +| 9 | 4 12 | 1 | +| 10 | 0 34 | 2 | +| 11 | 1 40 | 2 | +| 12 | 2 01 | 2 | +| 13 | 3 12 | 2 | +| 14 | 4 23 | 2 | +| 15 |012 | 0 | +------------------------------------------ +``` + +Next, the indexed distribution is collapsed in the following **replica map**: + +```text +--------------------------- +| Partition | Replicas | +--------------------------- +| 0 | [0, 1, 2] | +| 1 | [1, 2, 3] | +| 2 | [2, 3, 4] | +| 3 | [3, 4, 0] | +| 4 | [4, 0, 1] | +| 5 | [0, 2, 3] | +| 6 | [1, 3, 4] | +| 7 | [2, 4, 0] | +| 8 | [3, 0, 1] | +| 9 | [4, 1, 2] | +| 10 | [0, 3, 4] | +| 11 | [1, 4, 0] | +| 12 | [2, 0, 1] | +| 13 | [3, 1, 2] | +| 14 | [4, 2, 3] | +| 15 | [0, 1, 2] | +--------------------------- +``` + +For **balanced distribution** the algorithm **chains multiple calls sequentially**. The algorithm starts the next run **at the index** where the last run completed. That index is mapped to partition 0. + +In this example, if the last replica assignment completed at index 8, the next run starts at index 9 and partition 0 is assigned replica: [4, 1, 2]. + + +### Computed Replica Assignment with Rack Enabled + +**SPUs** may be assigned rack labels to distribute replicas across racks. **CRA with rack enabled** algorithm will fail unless **all SPUs** in the cluster have **rack** defined. + +Rack label earmark SPUs with a locations such as a physical rack name, or a cloud availability zone. The algorithm ensures partitions are distributed **across racks**. To achieve a **balanced distribution**, you must have the same number of SPUs for each rack. + +#### Algorithm + +The algorithm is designed to work with SPUs in **any rack assignment**. If the rack assignment is unbalanced, the algorithm fills gaps with a round-robin allocation in the SPU matrix. + +The algorithm has the following 3 stages: + +* Stage 1: Create a rack centric SPU matrix +* Stage 2: Convert SPU matrix to an SPU sequence +* Stage 3: Generate replica map + +##### Example 1 - Balanced Rack Distribution + +On a cluster with **12** SPUs evenly distributed across **4** racks, a topic created with: + +| Replicas | Partitions | +| :------: | :--------: | +| **4** | **12** | + + +The **3-stage** algorithm generates the following distribution: + + +```text +Stage 1: SPU Matrix allocation +------------------------------------------ + rack-a: 0, 1, 2 + rack-b: 3, 4, 5 + rack-c: 6, 7, 8 + rack-d: 9, 10, 11 + +Stage 2: SPU sequence (read in diagonal) +------------------------------------------ + 0, 4, 8, 9, 1, 5, 6, 10, 2, 3, 7, 11 + +Stage 3: Replica Map +---------------------------------------------------------------- +Partition | Replicas rack-a rack-b rack-c rack-d +---------------------------------------------------------------- + 0 | [ 0, 4, 8, 9] [1] [ ] 1 [ ] 1 [ ] 1 + 1 | [ 4, 8, 9, 1] [1] 1 [1] 1 [ ] 2 [ ] 2 + 2 | [ 8, 9, 1, 5] [1] 2 [1] 2 [1] 2 [ ] 3 + 3 | [ 9, 1, 5, 6] [1] 3 [1] 3 [1] 3 [1] 3 + 4 | [ 1, 5, 6,10] [2] 3 [1] 4 [1] 4 [1] 4 + 5 | [ 5, 6,10, 2] [2] 4 [2] 4 [1] 5 [1] 5 + 6 | [ 6,10, 2, 3] [2] 5 [2] 5 [2] 5 [1] 6 + 7 | [10, 2, 3, 7] [2] 6 [2] 6 [2] 6 [2] 6 + 8 | [ 2, 3, 7,11] [3] 6 [2] 7 [2] 7 [2] 7 + 9 | [ 3, 7,11, 0] [3] 7 [3] 7 [2] 8 [2] 8 + 10 | [ 7,11, 0, 4] [3] 8 [3] 8 [3] 8 [2] 9 + 11 | [11, 0, 4, 8] [3] 9 [3] 9 [3] 9 [3] 9 +---------------------------------------------------------------- + Leaders 3 3 3 3 + Followers 9 9 9 9 +---------------------------------------------------------------- +``` + +Replicas are evenly distributed across racks and SPUs. + +##### Example 2 - Unbalanced Rack Distribution + +On a cluster with **6** SPUs unevenly distributed across **3** racks: + +* rack-a: **0** +* rack-b: **1, 2** +* rack-c: **3, 4, 5** + +For a topic created with: + +| Replicas | Partitions | +| :------: | :--------: | +| **3** | **6** | + +The **3-stage** algorithm generates the following distribution: + +```text +Stage 1: SPU Matrix allocation (sorted by size) +------------------------------------------ + rack-c: 3, 4, 5 + rack-b: 1, 2, _ + rack-a: 0, _, _ + +Stage 2: SPU sequence (read in diagonal) +------------------------------------------ + 3, 2, 0, 4, 1, 5 + +Stage 3: Replica Map +------------------------------------------------------- +Partition | Replicas rack-c rack-b rack-a +------------------------------------------------------- + 0 | [3, 2, 0] [1] [ ] 1 [ ] 1 + 1 | [2, 0, 4] [1] 1 [1] 1 [ ] 2 + 2 | [0, 4, 1] [1] 2 [1] 2 [1] 2 + 3 | [4, 1, 5] [2] 3 [1] 3 [1] 2 + 4 | [1, 5, 3] [2] 5 [2] 3 [1] 2 + 5 | [5, 3, 2] [3] 6 [2] 4 [1] 2 +------------------------------------------------------- + Leaders 3 2 1 + Followers 6 4 2 +------------------------------------------------------- +``` + +Replicas are evenly distributed across SPUs. Racks with a higher number of SPUs handle more replicas. If a power failure occurs on a large rack, leader redistribution may overwhelm the SPUs on the smaller racks. + +### Manual Replica Assignment + +**MRA** is provisioned through a **replica assignment file**. The file defines a **replica map** that is semantically similar to the **replicaMap** defined in the [Topic Status]. In fact, the **replica map** defined as defined in the file is assigned to this field. + +The following command creates a topic from a **replica assignment file**: + +```bash copy="fl" +$ fluvio topic create custom-topic --replica-assignment ./my-assignment +``` + +_Validate-only_ flag is available to verify a replica assignment file without applying any changes. + +#### Replica Assignment File + +**Replica assignment file** defines a **replica map** in JSON format. A replica map with 2 partitions and 3 replicas is defined as follows: + +```json +[{ + "id": 0, + "replicas": [0, 1, 2] +}, +{ + "id": 1, + "replicas": [1, 2, 0] +} +] +``` + +The **replica map** definition meet the following criteria: + +* **id**: + - must start with 0. + - must be in sequence, without gaps. + +* **replicas**: + - must have at least one element. + - all replicas must have the same number of elements. + - all elements must be unique. + - all elements must be positive integers. + +For additional information on how to check the result of a replica assignment file, checkout [Topics CLI]. + +[Topics CLI]: fluvio/cli/fluvio/topic.mdx +[replica election]: fluvio/concepts/architecture/replica-election.mdx +[Topic Status]: fluvio/concepts/architecture/sc.mdx#topic-status \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/architecture/replica-election.mdx b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/replica-election.mdx new file mode 100644 index 00000000..830e846e --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/replica-election.mdx @@ -0,0 +1,197 @@ +--- +sidebar_position: 6 +title: "Replica Election" +--- + +[Replica Assignment] _assigns_ SPUs to a replica set and **Replica Election** _coordinates_ their roles. The election algorithm manages replica sets in an attempt to designate one active leader at all times. SPUs have a powerful multi-threaded engine that can process a large number of leaders and followers at the same time. + +If an SPU becomes incapacitated, the election algorithm identifies all impacted replica sets and triggers a re-election. The following section describes the algorithm utilized by each replica set as it elects a new leader. + +## Roles and Responsibilities + +The `Leader` and `Followers` of a **Replica Sets** have different responsibilities. + +Replica Election + +`Leader` responsibilities: +* ingests data from producers +* stores the data in the local store +* sends data to consumers +* forwards incremental data changes to followers +* keeps live replica sets (**LRS**) updated + +`Followers` responsibilities: +* establishes connection to the leader (and run periodic health-checks) +* receives data changes from the leader +* stores data in the local store + +All followers are in hot-standby and ready to take-over as leader. + +## Replica Election Algorithm + +Each data stream has a **Live Replica Set (LRS)** that describes the SPUs actively replicating data records in their local data store. **LRS status** can be viewed in `show partitions` CLI command. + +Replica Election + +Replica election covers two core cases: +* SPU `goes offline` +* SPU that was previously part of the cluster `comes back online` + +### SPU goes Offline + +When an SPU goes offline, the SC identifies all impacted `Replica Sets` and triggers an election: + +* set `Replica Set` status to _Election_ +* choose leader candidate from the _followers_ in **LRS** based on smallest lag behind the previous leader: + + * leader candidate found: + * set `Replica Set` status to _CandidateFound_ + * notifies all follower SPUs + * start _wait-for-response_ timer + + + * no eligible leader candidate available: + * set `Replica Set` status to _Offline_ + +###### Follower SPUs receive `Leader Candidate` Notification + +All SPUs in the Replica Set receive proposed `leader candidate` and perform the following operations: + +* SPU that matches `leader candidate` tries to promote follower replica to leader replica: + + * `follower` to `leader` promotion successful + * SPU notifies the SC + + * `follower` to `leader` promotion failed + * no notification is sent + +* Other SPUs ignore the message + + +###### SC receives `Promotion Successful` from `Leader Candidate` SPU + +The SC perform the follower operations: +* set `Replica Set` status to _Online_ +* update **LRS** +* notifies all follower SPUs in the **LRS** list + + +###### SC 'wait-for-response' timer fired + +The SC chooses the next `Leader Candidate` from the **LRS** list and the process repeats. + +If no eligible leader candidate left: +* set `Replica Set` status to _Offline_ + + +###### SPUs receive new `LRS` + +All SPU followers update their *LRS*: +* reconnect to new leader +* synchronize internal checkpoints with new leader (as described below). +* wait for changes from new leader + + +### SPU comes back Online + +When an known SPU comes back Online, the SC identifies all impacted `Replica Sets` and triggers a refresh. +For all Replica Sets with status _Offline_, the SC performs the following operations: + +* set `Replica Set` status to _Election_ +* choose leader candidate from _follower_ membership list based on smallest lag behind the previous leader: + + * leader candidate found: + * set `Replica Set` status to _CandidateFound_ + * notifies all follower SPUs (see above) + * start _wait-for-response_ timer + + + * no eligible leader candidate left: + * set `Replica Set` status to _Offline_ + +The algorithm repeats the same steps as in the "SPU goes Offline" section. + + +## Leader/Follower Synchronization + +Each SPU has a _Leader Controller_ that manages leader replicas, and a _Follower Controller_ that manages follower replicas. SPU utilizes Rust **async framework** to run a virtually unlimited number of leader and follower operations simultaneously. + +### Communication Channels + +Each **Replica Set** has a communication channel where for the leader and followers exchange replica information. It is the responsibility of the followers to establish a connection to the leader. Once a connection between two SPUs is created, it is shared by all replica sets. + +For example, three replica sets **a**, **b**, and **c** that are distributed across `SPU-1`, `SPU-2`, and `SPU-3`: + +Replica Election + +The first follower (**b**, or **c**) from `SPU-1` that tries to communicate with its leader in `SPU-2` generates a TCP connection. Then, all subsequent communication from `SPU-1` to `SPU-2`, irrespective of the replica set, will reuse the same connection. + +Hence, each SPU pair will have at most 2 connections. For example: +* `SPU-1` ⇔ `SPU-2` + * `SPU-1` followers ⇒ `SPU-2` leaders + * `SPU-2` followers ⇒ `SPU-1` leaders + +### Synchronization Algorithm + + Replicas use **offsets** to indicate the position of a record in a data stream. Offsets starts at `zero` and are incremented by one anytime a new record is appended. + + **Log End Offset** (LEO) represents the offset of last record in the local store of a replica. A records is considered **committed** only when replicated by all live replicas. **Live Replica Sets (LRS)** is the set of active replicas in the membership list. **High Watermark** (HW) is the last offset of the record committed by the **LRS**. + + **Synchronization algorithm** collects the **LEOs**, computes the **HW**, and manages the **(LRS)**. + + Synchronization Algorithm + +In this example: +* LRS = 3 +* HW = 2 +* LEO (Leader = 4, Follower-1 = 3, Follower-2 = 2) + +If Follower-2 goes offline: LRS = 2 and HW = 3. + +###### Leader/Follower Synchronization + +All replica followers send their replica status, `LEO` and `HW`, to their leader. The leader: + +* uses `LEO` to compute the missing records and send to follower +* computes the 'new' `HW` (from min replica `LEOs`). +* sends `HW`, `LEO`, and `LRS` to all followers + +Replica followers receive the data records, `LEO`, and `HW` from the leader and perform the following operations: + +* append records to local stores +* update local `LEO` and `HW` +* send updated status to leader + +And the cycle repeats. + + +###### Lagging Follower + +If the leader detects any of follower's `HW` is less than the **LRS** `HW` by a maximum number of records, the leader removes the follower from the **LRS**. + +Followers removed from the **LRS** are ineligible for election but continue to receive records. If follower catches up with the leader it is added back to **LRS** and once again becomes eligible for election. + + +###### Leader Failure + +If a leader goes offline, an election is triggered and one of the followers takes over as leader. The rest of the followers connect to the new leader and synchronize their data store. + +When the failed leader rejoins the replica set, it detects the new leader and turns itself into a follower. The replica set continue under the new leadership until a new election is triggered. + +### Consumer Consistency Model + +Replica leaders receive data records from producers and sends them to consumers. + +Consumers can choose to receive either COMMITTED or UNCOMMITTED records. The second method is discouraged as it cannot deterministically survive various failure scenarios. + +By default, UNCOMMITTED messages are sent to consumers. + +[Replica Assignment]: fluvio/concepts/architecture/replica-assignment.mdx \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/architecture/sc.mdx b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/sc.mdx new file mode 100644 index 00000000..ea50c75d --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/sc.mdx @@ -0,0 +1,404 @@ +--- +sidebar_position: 2 +title: "Streaming Controller (SC)" +--- + +**Streaming Controller (SC)** is the central coordinator and the **authoritative** entity of the cluster. It manages configuration changes, provisions SPUs, performs replica assignment, coordinates communication with external clients, and sends periodic reconciliation updates. + +Architecture Components - SC + +The SC leverages a **Key-Value (KV) store** to persist cluster object configurations. + + +#### Key-Value Store + +**Fluvio** is designed to work seamlessly with **Kubernetes** and **etcd** **KV** stores. The **KV interface** is store agnostic and can be extended to support alternative implementations such Consul, Zookeeper, or in-memory stores. + +#### Default Ports + +SCs have a **public** and a **private** server that are attached to the following ports: + +* **Public Port**: 9003 +* **Private Port**: 9004 + + +## Core Objects + +There are four core objects in a Fluvio cluster: **SPU**, **SPU-group**, **Topic**, and **Partition**. The objects follow the **Kubernetes paradigm** with two fields that govern the configuration: the spec and the status. **Spec** expresses a desired state and the **status** describes the current state. + + +### SPUs + +**SPUs spec** has a unique ID, a type, an optional rack, and endpoint identifier for the API servers. **SPU Id** is shared across all SPU types and it must be **globally unique**. + +```yaml +spec: + spuId: 100 + spuType: "Custom" + rack: "Zone1" + storage: + size: 2Gi + logDir: "/tmp/mylog" + publicEndpoint: + port: 9005 + ingress: + - hostname: localhost + encryption: TLS + privateEndpoint: + port: 9006 + host: localhost + encryption: TLS +``` + +**SPU status** has a resolution field that monitors changes in **connectivity** from the SC's point of view. + +```yaml +status: + resolution: online +``` + +There are two types of SPUs: managed and custom. **Managed** SPUs are provisioned and maintained by Fluvio, whereas **custom SPUs** are provisioned and managed out of band. Fluvio has the ability to support multiple **managed and custom** SPUs simultaneously. **SPUs** can be deployed in a virtually unlimited topologies across **availability zones** and **geo-locations**. + +#### Custom SPUs + +Custom SPUs are designed for **Edge** devices, **IOT** devices or **custom environments** where the infrastructure is managed through deployment tools such as Puppet, Chef, or Ansible. This feature is currently experimental. + +##### Install Custom SPUs + +The SC requires Custom SPUs to be registered before they are allowed to join the cluster: + +1. Register a new Custom SPU in the SC. +2. Configure Custom SPU in a network that has connectivity with the SC. +2. Deploy the Custom SPU. +3. Check SPU status on the SC to see if connected successfully. + +Aside from the differences in installation, all SPU types are treated the same. + + +#### Managed SPUs + +**Managed SPUs** are groups of SPUs that are **scaled independently** based on user configurable replication factors. Managed SPUs are configured and owned by SPU-groups, defined below. + + +### SPU Groups + +Fluvio **SPU-groups** define the configuration parameters used for provisioning groups of **Managed SPUs**. + +SPU Groups + +**Replica** specifies the number of SPUs in a group and it can be dynamically changed: + +* if higher, new SPUs are **provisioned** +* if lower, SPUs with the higher ids are **terminated**. + + **MinId** is the Id of the first SPU in the replica range. + + **Template** defines configuration parameters passed to all SPUs in the group. While there are many configuration parameters in the template section, the most relevant one in the **storage/size**. If no **size** is specified, it default to 1 gigabyte. + +##### SPU-group Spec + +```yaml +spec: + replicas: 2 + minId: 11 + template: + storage: + size: 2Gi + logDir: "/tmp/mylog" + publicEndpoint: + port: 9005 + ingress: + - hostname: localhost + encryption: TLS + privateEndpoint: + port: 9006 + host: localhost + encryption: TLS +``` + +##### SPU-group Status + +```yaml +status: + resolution: Reserved +``` + +SPU-group status has 3 **resolutions**: Init, Invalid, and Reserved. If the group is marked invalid, a **reason** field describes the error. + +### Topics + +**Topics** define configuration parameters for data streams. A topic may have one or more partition and a replication factor. **Partitions** split the data into independent slices that can be managed by different SPUs. **Replication factor** defines the number of copies of data across SPUs. + +##### Topic Spec + +```yaml +spec: + partitions: 6 + replicationFactor: 3 +``` + +A topic with *6 partitions* and a *replication factor of 3* on a new cluster generates the following distribution: + +Partition Assignment + +The algorithm that computes partition/replica distribution is described in the [Replica Assignment] section. + +Fluvio also supports **manual** partition/replica distribution through a **replica assignment file**. The file format is described in the [Topics CLI] section. + +##### Topic Status + +```yaml +status: + resolution: Provisioned + replicaMap: + - 0: [0, 1, 2] + - 1: [1, 2, 0] + - 2: [2, 1, 0] + - 3: [0, 1, 2] + - 4: [1, 2, 0] + - 5: [2, 1, 0] +``` + +**Resolution** reflects the status of topic: + +* Init - topic is initializing. +* Pending - configuration is valid, topic is provisioning SPUs for replica map. +* InsufficientResources - replica map cannot be created due to lack of SPUs. +* InvalidConfig - invalid configuration (for manual replica assignment). +* Provisioned - topic is successfully allocated. + +If an errors occurs **reason** field describes the cause of the error. + +**Replica Map** defines the partition/replica distribution. The first number is the **partition index** and the array is a **list the SPUs** with the leader in first position. + +In this example, **4: [1, 2, 0]** defines: + +* a partition index 4 +* a list of SPUs + * SPU 1 is the leader. + * SPU 0 and SPU 2 are followers. + + +### Partitions + +When a new topic is created, the **SC** performs [Replica Assignment] to generate the partitions: + +1. **generate a partition map** and store in the topic status, +2. **create a partition object** for each row in the partition map, +3. **assign each partition** to the SPU leader. + +**SC** is responsible for the configuration in the **Partition Spec** and the **SPU** leader is responsible for the **Partition Status**. + +##### Partition Spec + +```yaml +spec: + initialLeader: 101 + replicas: [101, 102] +``` + +The **SC** defines **replica assignment** and the SPU **initial leader**. After initial allocation, the **SC** notifies SPU **leader** and **followers** of the new partition. + +##### Partition Status + +```yaml +status: + leader: 101 + lrs: [101, 102] + ... +``` + +**SPU leader** is responsible for managing **Live Replicas (lrs)** and other data streaming related parameters. + +Replica management, election, and all other status fields are documented in the [SPU Architecture] section. + +## Workflows + +**SC** design is an event driven architecture that **captures cluster changes** and keeps the SPUs and the Key-Value store **synchronized**. + +SC Workflows + +The SC uses a **common workflow** to process all event types: + +1. Readers + * capture incoming events + * invoke Metadata Dispatcher +2. Metadata Dispatcher + * saves events metadata in **local store** + * invokes a Controller +3. Controllers + * each **controller** applies object centric business logic + * generate actions +4. Action Dispatcher + * distributes actions +5. Updaters + * formats actions + * sends update to external entity + +###### Local Store + +Metadata dispatcher maintains a **Local Store** of read-only objects types that mirror the KV store. Objects in the local store can only be updated by KV Store requests. The local store is utilized by **Controllers** to **transform** events into the actions. + +## Controllers + +SPU, Topic, and Partition Controllers run independently and manage the workflows for their designated objects. + + +### SPU Controller + +SPU Controller listens for SPU events from KV store and events from Connection Manager. + +SPU Controller + +* **Add SPU** + + SPU controller creates an **action** to add SPU to the Connection Manager. + +* **Modify SPU** + + SPU controller creates an **action** to update SPU in the Connection Manager. + +* **Delete SPU** + + SPU controller creates an **action** to delete SPU from the Connection Manager. + +* **Online/Offline** + + When connection status changes, the controller creates an **action** to update SPU **resolution status** in the KV store. + + +### Topic Controller + +Topic Controller listens for Topic and SPU events from KV store. + +Topic Controller + +* **Add Topic** + + Topic controller creates an **action** to update Topic **status** resolution to **Init** in the KV store. + +* **Modify Topic** + + For topics with status resolution **Init** or **Invalid**, the controller validates partition and replication configuration parameters. Upon validation, the controller: + + * Params _OK_ - creates an **action** to update Topic **status** resolution to **Pending** in KV store. + * Params _Invalid_ - creates an **action** to update Topic **status** resolution to **Invalid** in KV store. + + For topics with status resolution **Pending** or **InsufficientResources**, the controller checks if the number SPUs meets the replication factor. Upon validation, the controller: + + * SPUs _Ok_ - generates a Replica Map and creates the following **actions** for the KV Store: + + * an **action** to update **status** resolution to **Provisioned** and replicaMap to **Replica Map**. + * an **action** to create a new **Partition** for each entry in the Replica Map. + + * _Not enough SPUs_ - creates an **action** to update Topic **status** resolution to **InsufficientResources** in KV store. + +* **Add SPU** + + Topic controller selects all topics with **status** resolution in **Pending** or **InsufficientResources** and generates a new Replica Map. + + * for each topic with a new Replica Map, the controller creates **2 actions** for the KV Store: + + * an **action** to update **status** resolution to **Provisioned** and replicaMap to **Replica Map**. + * an **action** to create a new **Partition** for each entry in the Replica Map. + + +### Partition Controller + +Partition Controller listens for Partition and SPU events from KV store and events from Connection Manager. + +Partition Controller + +* **Add Partition** + + Partition controller creates the following actions: + + * **action** to add Partition to the Connection Manager. + * **action** to update Partition **status** resolution to **Offline** in KV store. + +* **Modify Partition** + + Partition controller creates an **action** to update **Partition spec** in the Connection Manager. + +* **Delete Partition** + + Partition controller creates an **action** to delete **Partition spec** from the Connection Manager. + +* **Modify SPU** + + Partition controller checks if SPU status changed from **Online -> Offline** and it retrieves all Partitions that with the SPU is the leader. + + * for each partition, the controller computes a **new leader** candidates. Upon completion, the controller: + + * leader computed: creates an **action** to update leader of Partition Status in Connection Manager. + * no suitable leader found: creates **action** to update Partition **status** resolution to **Offline** in KV store. + + Partition controller checks if SPU status changed from **Offline -> Online** and it retrieves all Partitions with **status** resolution **Offline**. + + * for each partition where this SPU is not the leader, the controller checks if the SPU eligible to become leader. + + * SPU eligible: the controller creates an **action** to update Partition **status** leader to the SPU id in KV store. + * SPU not suitable to be leader: the controllers leaves leave partition unchanged. + +* **Change LRS** + + Partitions controller receives **Live Replicas (LRS)** updates form **Connection Manager**. The matching Partition is updated in the KV store with the following action: + + * Update Partition status: **action** to update Partition **status** resolution to **Online**, **leader** to LRS.leader and **replica** to LRS.replica. + + +## Connection Manager + +**Connection Manager (CM)** is an **SC Server** module responsible for the connections between **SC** and **SPUs**. The **CM** only **accepts** connections from **registered** SPUs. + +#### Connection Setup + +A connection is established in the following sequence: + +Connection Manager + +* **SPU Controller** sends **add SPU spec** to **CM**. +* **Partition Controller** sends **add Partitions** to **CM** +* **CM** saves **SPU** and **Partitions** in local cache. +* **SPU** requests a connection authorization. +* **CM** authorizes registered SPUs and rejects all others. +* **CM** sends authorization **accepted** to SPU. +* **CM** saves connection stream in local cache. +* **CM** notifies all relevant **Controllers** to change SPU status to **online** +* **CM** sends **SPU Spec** and **Partition Specs** relevant to the SPU. +* **CM** receives continuous **LRS updates** from all SPUs in the cluster. + +After the connection is established, both endpoints can initiate requests. + +#### Connection Failure + +If the connection drops is due to network failures or SPU going offline, the **CM** takes the following remediation steps: + +* **CM** removes connection stream from local cache. +* **CM** notifies all relevant controllers that SPU status is **offline**. + +When the SPU come back online it initiates a new connection as described in the **Connection Setup** section. + +#### Live Replica (LRS) Updates + +**Live Replicas (LRS)** are continuous updates sent by **leader SPUs** to the **CM** to report changes in replica status. The **CM** forwards the requests to relevant **Controllers** for processing. + +[SPU Architecture]: fluvio/concepts/architecture/spu.mdx +[Replica Assignment]: fluvio/concepts/architecture/replica-assignment.mdx +[Topics CLI]: fluvio/cli/fluvio/topic.mdx \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/architecture/spu.mdx b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/spu.mdx new file mode 100644 index 00000000..09818915 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/architecture/spu.mdx @@ -0,0 +1,161 @@ +--- +sidebar_position: 3 +title: "Streaming Processing Unit (SPU)" +--- + +**Streaming Processing Unit (SPU)** is responsible for processing data streams in real-time. The SPUs is designed for **horizontal scale**, where SPUs are gradually added to the cluster to accommodate higher data throughput. Each SPU **manages replicas** which represent the lowest unit of a data stream. Replicas are copies of data streams that are evenly distributed across SPUs. + +SPU Architecture + +#### Default Ports + +SPUs have a **public** and a **private** server that are attached to the following ports: + +* **Public Port**: 9005 +* **Private Port**: 9006 + + +## Workflows + +The **SPU** is a high performance streaming processing unit that works in unison with other SPUs to perform the following **core tasks**: + +* manage replicas +* manage local storage +* receive data from Producers +* send data to Consumers +* send copies of the data to peer SPUs + +The following diagram describes **SPU** object relationships and workflows: + +SC Controller + +1. Leader Controller (LC) + * receives SPU and Partition specs from SC Dispatcher + * creates local storage for Replicas + * syncs Replica info with Followers + * sends Replica info to Follower SPU Multicast writer + * receives Replica info from Follower SPU Dispatcher + * sends LRS status to SC Updater + * receives message from Producers + * sends messages to Consumers +2. Follower Controller (FC) + * receives Partition specs from SC Dispatcher + * creates local storage for Replicas + * syncs Replica info with Leader + * sends Replica info to Leader SPU Multicast dispatcher + * receives Replica info from Leader SPU Dispatcher +3. Dispatch SPU/Partition + * receives SPU and Partition specs from the SC + * dispatches SPU and Partition specs to Leader Controller + * dispatches Partition spec to Follower Controllers + * spawns Leader and Follower Controllers (if needed) +4. Update LRS + * receives LRS info from Leader Controller and sends to SC +5. Dispatch Replica Info + * receives Replica Info from Leader SPU and sends to Follower Controller + * receives Replica Info from Follower SPU and sends to Leader Controller +6. Multicast Replica Info + * receives Replica info from Follower Controller and unicasts to Leader SPU + * receives Replica info from Leader Controller and multicasts to Follower SPUs + + +## Controllers + +SPU Controllers are responsible for core data processing. Unlike SC Controllers, which are single instances provisioned at startup, SPU Controllers are dynamically allocated. The SPU Controllers are optimized for maximum concurrency and can run a large numbers of instances in parallel. + +As show in the diagram above, there are two types of Controllers: + +* Leader Controller +* Follower Controller + +At startup, the **SC dispatcher** manages the Leader and Follower Controllers and forwards the **SPU** and **Partition** Specs. + + +### Leader Controller + +A **Leader Controller (LC)** is spawned by the **SC Dispatcher** when a new Leader Replica Spec is received. Each **LC** is responsible for managing the Replica Leader and the storage areas dedicated for the replica. When the Replica is removed, the **LC** is terminated but the storage is preserved. If a Replica with the same id is created again, the storage is reattached. + +#### Inter-SPU Connections + +Similar to the SC-SPU connection setup, **LC** waits for the Follower SPU to initiate a full duplex connection before it can communicate. + +#### Producer/Consumers + +Each **LC** is solely responsible for the interaction with producers and consumers. When the **LC** receives messages from **Producers**, it performs the following operations: + +* appends new records to local storage +* sends updated offsets to sc and follower controllers +* for each consumer, it sends records as stated in their request type: + * committed records - records that have been replicated across followers + * uncommitted records - records that have been persisted on local storage + +#### Offset Handling + +Leader and Followers sync their offsets with each other. If followers fall behind, the leader sends missing records until the followers catch-up. + +When the leaders receives an offset index from the follower, the leader computes the lagging indicator. This indicator is used: + +* to detect if records can be committed. +* to identify the followers that are behind. + +Replica information such as committed records and lagging indicators are sent to the **SC** in the Live Replicas (LRS) message. + + +### Follower Controller + +A **Follower Controller (FC)** managed all Follower Replicas grouped by a **Leader**. The **FC** is spawned by the **SC Dispatcher** when the both conditions are met: + +* Follower Spec is new. +* No **FC** has previously been created. + +The **FC** is terminated when the last Follower Spec is removed. Each **FC** is responsible for the storage areas dedicated for all follower replicas. The storage is preserved when the **FC** is terminated. + +#### Workflow + +**FC** event loop engine performs the following operations: + +* Ensures connection to **Leader SPU** is established, otherwise connect. +* Creates or reattach local storage when new Replica Follower is received. +* Syncs offsets with **Leader SPU**. +* Adds records to Replica from **Leader SPU**. + + +## Replica Election + +Topics are created with a _replication factor_ which defines the number of data copies saved for each data stream. For example, a topic with a replication factor of 3 will generate 3 copies of data, one per SPU. + +[Replica Assignment] algorithm designates sets of SPUs to store identical copies (_replicas_) of the data for each data stream. The [Election algorithm] assigns one SPU as leader and the others as followers. The leader is responsible for data propagation and the communication with producers and consumers. The followers are responsible with replicating data received from the leader. If the leader goes offline, an `election ensues` and one of the followers becomes the new leader. After the election is completed, clients automatically reconnect and operation resumes. + +Election algorithm and failover scenarios are described in detail in the [Replica Election] section. + + +## Replica Storage + +Each replica writes records in a local file on the SPU. + +New records are appended to files and become immutable. + +Records are indexed by offset and time. Each records consists of an arbitrary binary key/value key. + +Records are organized in segments of a predefined maximum size. Segments can be purged based on a retention policy. + +To allow faster access to records in replica, index files are maintained. Index file is memory mapped to b-tree structure to allow fast access. Index files are re-built from records as necessary. + +Records IO is optimized for each platform designed for high-throughput async IO. There are two factors that determine system performance: + +* Consistency Model - COMMITTED/UNCOMMITTED +* Flushing Policy - by default records are flushed based on the minimum number of bytes or after a certain amount of time elapsed. + +### Zero Copy + +Records in replica are send to consumer using zero copy mechanism. Zero copy mechanism avoids the need to copy records in memory and increases performance. + +Records can be batched together to improve performance. + +[Replica Assignment]: fluvio/concepts/architecture/replica-assignment.mdx +[Election algorithm]: fluvio/concepts/architecture/replica-election.mdx +[Replica Election]: fluvio/concepts/architecture/replica-election.mdx \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/batching.mdx b/versioned_docs/version-0.13.0/fluvio/concepts/batching.mdx new file mode 100644 index 00000000..cc6f5304 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/batching.mdx @@ -0,0 +1,92 @@ +--- +sidebar_position: 10 +title: "Batching" +--- + +Fluvio producers try to send records in batches to reduce the number of messages sent and improve throughput. Each producer has some configurations that can be set to improve performance for a specific use case. For instance, they can be used to reduce disk usage, reduce latency, or improve throughput. +As of today, batching behavior in Fluvio Producers can be modified with the following configurations: + +- `max_request_size`: Indicates the maximum number of bytes that the producer can send in a single request. If the record is larger than the max request size, the producer will fail to send the record. Only the uncompressed size of the record is considered. Defaults to 1048576 bytes. +- `batch_size`: Indicates the maximum number of bytes that can be accumulated in a batch. If the record is larger than the batch size, the producer will send the record in a single new batch. Only the uncompressed size of the record is considered. Defaults to 16384 bytes. +- `compression`: Compression algorithm used by the producer to compress each batch before sending it to the SPU. Supported compression algorithms are none, gzip, snappy and lz4. +- `linger`: Time to wait before sending batches to the server that have not reached maximum batch size. Defaults to 100 ms. + + +# Trade-offs and Considerations + +Every configuration presents a mix of advantages and disadvantages: + +- `max_request_size`: Allows the producer to send larger records, will improve throughput but drop packets that don't match criteria. +- `batch_size`: Larger value can reduce the number of requests sent to the server, but will increase latency. +- `compression`: Helps decrease storage size and improve networking throughput but will increase CPU usage and add latency. +- `linger`: A value of 0 sends records immediately, minimizing latency but will reduce throughput. Higher values will introduce delay but improve throughput and network utilization. + +The ideal parameters for the `max_request_size`, `batch_size`, `linger` and `compression` depend on your application needs. + +# Example Scenarios + +Create a topic and generate a large data file: + +```bash +fluvio topic create example-topic +printf 'This is a sample line. ' | awk -v b=500000 '{while(length($0) < b) $0 = $0 $0}1' | cut -c1-500000 > large-data-file.txt +``` + +### Max Request Size + +`max_request_size` defines the maximum size of a message that can be sent by the producer. If a message exceeds this size, Fluvio will throw an error. + +```bash +fluvio produce example-topic --max-request-size 16384 --file large-data-file.txt --raw +``` + +Will be displayed the following error: + +```bash +Error: Record dropped: record size (xyz bytes), exceeded maximum request size (16384 bytes) +``` + +### Batch Size + +`batch_size` defines the cumulative size of all records sent in the same batch. If a record exceeds this size, Fluvio will process the record in a new batch without the `batch_size` as limit. + +```bash +fluvio produce example-topic --batch-size 16536 --file large-data-file.txt --raw +``` + +In this example, the record is divided into multiple batches. Hence, there is no error. + +### Compression + +The algorithm computes all values pre-compression. Use raw size values to ensure to ensure your records are processed. + +`batch_size` and `max_request_size` will only use the uncompressed message size. + +```bash +fluvio produce example-topic --batch-size 16536 --compression gzip --file large-data-file.txt --raw +fluvio produce example-topic --max-request-size 16384 --compression gzip --file large-data-file.txt --raw +``` + +Only the second command will display an error because the uncompressed message exceeds the max request size. + + +### Linger + +`linger` defines the time that the producer will wait before sending a batch of records. + +As linger is only relevant when the records are smaller than the batch size, in the following example, the records are sent without delay: + +```bash +fluvio produce example-topic --linger 10sec --file large-data-file.txt --raw +``` + +In the following example, we are using small records and linger waits for the time-based trigger to produce: + +```bash +fluvio produce example-topic --linger 10sec +> abc +> abc +> abc +``` + +As all the records are small and the batch is not full, the producer will wait for the linger time to send the batch. diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/data-consistency.mdx b/versioned_docs/version-0.13.0/fluvio/concepts/data-consistency.mdx new file mode 100644 index 00000000..8dbc82fe --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/data-consistency.mdx @@ -0,0 +1,63 @@ +--- +sidebar_position: 11 +title: "Data Consistency" +--- + +Data in this context is a set of records that producers send to the partition leader. The leader is responsible for receiving +messages from the producer, sending messages to the consumer, and replicating them to followers. There can be only **one leader +per partition** at any point in time. Depending on the cluster and topic configurations, there can be many or zero followers. + +Messages get the **order** that the leader observes, and **the reordering is proscribed**. Records order inside the message +is kept and cannot be changed. Each record gets assigned to a **unique monotonically increased** number called **offset**. + +After a record gets accepted by the leader, it can be in one of two states: COMMITTED or UNCOMMITTED. COMMITTED denotes +that all followers have acknowledged the record. +If there are no followers in the partition, the state is always COMMITTED once +a record gets acknowledged. Records are UNCOMMITTED in all other cases. +See more details about the [Synchronization Algorithm]. + +Neither the leader nor the follower waits for **data persistence** (fsync) before sending an acknowledgment of the record. It means that +**uncommitted** records may be lost if Leader crashes. + +The leader does not uphold an **atomicity guarantee** for the entire message. Records are processed one by one. If an error occurs, +the operation aborts, response with an error message is returned, but Fluvio does not roll back previous records from the batch. + +What records state to use is a configurable option for both producers and consumers. + +## Producer Isolation +Isolation is a configuration parameter of Producer that has two values: + +1. `ReadCommitted` - Leader waits for records to get committed before sending acknowledgement to Producer. +```bash +$ fluvio produce greetings --isolation read_committed +``` + + +2. `ReadUncommitted` - Leader does not wait for records to get committed before sending acknowledgement to Producer. +```bash +$ fluvio produce greetings --isolation read_uncommitted +``` + +`ReadUncommitted` isolation gives **lower latency** but has **weaker guarantees**. + +If not specified, `ReadUncommitted` isolation is used by default. + +-> Producer Isolation determines when a successful delivery has been made for **at-least-once** delivery semantic. [Read details]. + +## Consumer Isolation +Isolation is a configuration parameter of Consumer that has two values: + +1. `ReadCommitted` - Read COMMITTED records only. Leader doesn't send UNCOMMITTED records to Consumer. +```bash +$ fluvio consume greetings --isolation read_committed +``` + +2. `ReadUncommitted` - Read all records regardless of the state. +```bash +$ fluvio consume greetings --isolation read_uncommitted +``` + +If not specified, `ReadUncommitted` isolation is used by default. + +[Synchronization Algorithm]: fluvio/concepts/architecture/replica-election.mdx#synchronization-algorithm +[Read details]: fluvio/concepts/delivery-semantics.mdx#at-least-once \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/delivery-semantics.mdx b/versioned_docs/version-0.13.0/fluvio/concepts/delivery-semantics.mdx new file mode 100644 index 00000000..264fdbd6 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/delivery-semantics.mdx @@ -0,0 +1,98 @@ +--- +sidebar_position: 12 +title: "Delivery Semantics" +--- + +The Internet, as well as other networks, is considered an unreliable communication channel. There can be delays or lost messages, and connections can fail unexpectedly. +This affects the reliability of record delivery between producers and the SPU. + +Fluvio producers can be configured with a `delivery_semantic` configuration option, which allows choosing a delivery mechanism. Each mechanism has +a different trade-off between reliability and performance. There are two delivery semantics currently supported by producers: +`at-most-once` and `at-least-once` (default). + + +Regardless of `delivery_semantic`, Fluvio batches outgoing records so it is necessary to flush the producer once all records have been sent to ensure proper delivery. + + +```rust +let fluvio = Fluvio::connect().await?; +let config = TopicProducerConfigBuilder::default() + .delivery_semantic(DeliverySemantic::AtMostOnce) + .build()?; +let producer = fluvio.topic_producer_with_config("greetings", config).await?; +producer.send("Hello", "Fluvio!").await?; +producer.flush().await?; +``` + +The mechanism for accessing response metadata for produced records is also the same for both `at-most-once` and `at-least-once` delivery semantics. + + + +```rust +let output = producer.send("Hello", "Fluvio!").await?; +// Provides response metadata for records such as offset. +// This only returns once batch has been sent and ack is received from SPU. +output.wait().await?; +``` + + + + +### At Most Once +`at-most-once` delivery means that for each record generated by a producer, that record is delivered zero or one times. **This means that messages may be lost.** + +The producer sends the message with records to the SPU and **does not +wait** for the response. This delivery method has higher throughput but no +guarantees that the message was delivered. + + +[Producer Isolation] has no effect if this delivery +semantic is used unless the user explicitly waits for the response, as shown in the following snippet: + + + +```rust +let fluvio = Fluvio::connect().await?; +let config = TopicProducerConfigBuilder::default() + .delivery_semantic(DeliverySemantic::AtMostOnce) + .build()?; +let producer = fluvio.topic_producer_with_config("greetings", config).await?; +let output = producer.send("Hello", "Fluvio!").await?; +output.wait().await?; // Producer isolation has no effect unless wait() is called +``` + + +### At Least Once +`at-least-once` delivery means that for each record handed to the producer potentially **multiple attempts** are made +at delivering it, such that at least one succeeds. **This means that messages may be duplicated +but not lost.** + +The producer sends the message with records to the SPU, **waits** for the response and **resends** in case of +transport errors occur. This delivery method has lower throughput comparing to `at-most-once` but better reliability. + + +[Producer Isolation] determines when the SPU will send the response signifying a successful delivery. + + +There are three main parameters that can be configured for `at-least-once` semantic: maximum amount of retries, the retry backoff strategy (fixed, Fibonacci, or exponential), and maximum timeout for all attempts. + +Example: + + +```rust +let policy = RetryPolicy { + max_retries: 5, + initial_delay: Duration::from_millis(10), + max_delay: Duration::from_sec(2), + timeout: Duration::from_sec(10), + strategy: RetryStrategy::ExponentialBackoff +}; +let config = TopicProducerConfigBuilder::default() + .delivery_semantic(DeliverySemantic::AtLeastOnce(policy)) + .build()?; +let producer = fluvio.topic_producer_with_config("greetings", config).await?; +``` +In the above example, Fluvio Producer retries at most five times; all retries take a maximum of 10 seconds. The delay between retries increases exponentially. +The first delay is 10ms, the second is 100ms, then 1000ms, and all others are 2000ms as it's defined as a maximum allowed delay. + +[Producer Isolation]: fluvio/concepts/data-consistency.mdx#producer-isolation \ No newline at end of file diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/offsets.mdx b/versioned_docs/version-0.13.0/fluvio/concepts/offsets.mdx new file mode 100644 index 00000000..31945460 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/offsets.mdx @@ -0,0 +1,37 @@ +--- +sidebar_position: 9 +title: "Offsets" +--- + +When a record is assigned an offset, that offset permanently identifies +that record. The offset that identified that record will never be reused for another record. + +In order to begin consuming records, a consumer must specify the topic and +partition to consume from, as well as the offset into the partition where +it should begin reading. + +The offset of a record is its total +position within its parent partition. + +There are multiple ways that +an offset may be derived in more convenient ways: + +- Directly, as an absolute index into the partition, starting from zero +- As a relative distance from the beginning of the partition +- As a relative distance from the end of the partition + + +There is + a difference between an absolute offset and a relative offset +from the beginning of the partition. + +When consumers specify a relative offset, the offset given by the consumer +is used to calculate the actual total offset into the partition. + +When a record is assigned an offset, that offset permanently identifies +that record, but this does not necessarily mean that the record will always be available. + +If a partition has a retention policy +that causes it to begin deleting records from the beginning, then the +relative-from-beginning offset will count forward from the oldest record +that is still available. diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/operations/_category_.json b/versioned_docs/version-0.13.0/fluvio/concepts/operations/_category_.json new file mode 100644 index 00000000..c6b0825e --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/operations/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "Operations", + "collapsed": true, + "position": 2 +} diff --git a/versioned_docs/version-0.13.0/fluvio/concepts/operations/data-retention.mdx b/versioned_docs/version-0.13.0/fluvio/concepts/operations/data-retention.mdx new file mode 100644 index 00000000..9b6db1d7 --- /dev/null +++ b/versioned_docs/version-0.13.0/fluvio/concepts/operations/data-retention.mdx @@ -0,0 +1,188 @@ +--- +sidebar_position: 2 +title: "Data Retention" +--- + +## Overview + +Topic data is automatically pruned when **any** of following criteria is true: +1. Partition size exceeds the configured max partition size +2. Elapsed time since the last write to segment has passed the configured retention time + +-> Data eviction operates on the segment level. If any above conditions are met, the entire segment gets removed. Only previous segments can be pruned. If your data resides in the active segment, it won't be evicted unless the segment turns to a historical (read-only) segment. + + +## Configuring retention + +Retention is configured per-topic at the time of topic creation with `fluvio topic create`. + +```shell +$ fluvio topic create -h +Create a Topic with the given name + +fluvio-stable topic create [FLAGS] [OPTIONS] + [...] + --retention-time