From 1c08d900c24f40f3da94a700ec88cee3c16dd142 Mon Sep 17 00:00:00 2001
From: monoxgas
Date: Fri, 10 May 2024 15:49:30 -0600
Subject: [PATCH] Docs updates dvra example
---
docs/home/getting-started.md | 321 ++++++++++++
docs/home/introduction.md | 39 ++
docs/home/principles.md | 23 +
docs/{index.md => home/workflow.md} | 42 +-
docs/stylesheets/extra.css | 89 +++-
docs/topics/agents.md | 16 +
docs/topics/async-and-batching.md | 177 +++++++
docs/topics/callbacks-and-mapping.md | 188 +++++++
docs/topics/chats-and-messages.md | 158 ++++++
docs/topics/chats.md | 99 ----
docs/topics/cheatsheet.md | 14 +
docs/topics/completions.md | 70 +++
docs/topics/generators.md | 167 +++++-
docs/topics/{setup_logging.md => logging.md} | 8 +-
docs/topics/models.md | 342 ++++++++++---
docs/topics/serialization.md | 121 +++++
docs/topics/tools.md | 153 +++++-
examples/_shared.py | 42 --
examples/bandit.py | 73 +--
examples/bandit_single.py | 477 -----------------
examples/dvra.py | 508 +++++++++++++++++++
mkdocs.yml | 23 +-
rigging/__init__.py | 3 +-
rigging/chat.py | 35 +-
rigging/completion.py | 2 +-
rigging/logging.py | 23 +-
rigging/tool.py | 13 +-
27 files changed, 2358 insertions(+), 868 deletions(-)
create mode 100644 docs/home/getting-started.md
create mode 100644 docs/home/introduction.md
create mode 100644 docs/home/principles.md
rename docs/{index.md => home/workflow.md} (56%)
create mode 100644 docs/topics/agents.md
create mode 100644 docs/topics/async-and-batching.md
create mode 100644 docs/topics/callbacks-and-mapping.md
create mode 100644 docs/topics/chats-and-messages.md
delete mode 100644 docs/topics/chats.md
create mode 100644 docs/topics/cheatsheet.md
create mode 100644 docs/topics/completions.md
rename docs/topics/{setup_logging.md => logging.md} (85%)
create mode 100644 docs/topics/serialization.md
delete mode 100644 examples/_shared.py
delete mode 100644 examples/bandit_single.py
create mode 100644 examples/dvra.py
diff --git a/docs/home/getting-started.md b/docs/home/getting-started.md
new file mode 100644
index 0000000..2a21afe
--- /dev/null
+++ b/docs/home/getting-started.md
@@ -0,0 +1,321 @@
+# Getting Started
+
+Rigging is a flexible library built on top of other very flexible libraries. As such it might take a bit to warm
+up to it's interfaces provided the many ways you can accomplish your goals. However, the code is well documented
+and topic pages and source are a great places to step in/out of as you explore.
+
+??? tip "IDE Setup"
+
+ Rigging has been built with full type support which provides clear guidance on what
+ methods return what types, and when they return those types. It's recommended that you
+ operate in a development environment which can take advantage of this information.
+ You're use of Rigging will almost "fall" into place and you won't be guessing about
+ objects as you work.
+
+## Basic Chats
+
+Let's start with a very basic generation example that doesn't include any parsing features, continuations, etc.
+You want to chat with a model and collect it's response.
+
+We first need to get a [generator][rigging.generator.Generator] object. We'll use
+[`get_generator`][rigging.generator.get_generator] which will resolve an identifier string
+to the underlying generator class object.
+
+??? note "API Keys"
+
+ The default Rigging generator is [LiteLLM][rigging.generator.LiteLLMGenerator], which
+ wraps a large number of providers and models. We assume for these examples that you
+ have API tokens set as environment variables for these models. You can refer to the
+ [LiteLLM docs](https://docs.litellm.ai/docs/) for supported providers and their key format.
+ If you'd like, you can change any of the model IDs we use and/or add `,api_key=[sk-1234]` to the
+ end of any of the generator IDs to specify them inline.
+
+```py hl_lines="3"
+import rigging as rg # (1)!
+
+generator = rg.get_generator("claude-3-sonnet-20240229") # (2)!
+pending = generator.chat(
+ [
+ {"role": "system", "content": "You are a wizard harry."},
+ {"role": "user", "content": "Say hello!"},
+ ]
+)
+chat = pending.run()
+print(chat.all)
+# [
+# Message(role='system', parts=[], content='You are a wizard harry.'),
+# Message(role='user', parts=[], content='Say hello!'),
+# ]
+```
+
+1. You'll see us use this shorthand import syntax throughout our code, it's
+ totally optional but makes things look nice.
+2. This is actually shorthand for `litellm!anthropic/claude-3-sonnet-20240229`, where `litellm`
+ is the provider. We just default to that generator and you don't have to be explicit. You
+ can find more information about this in the [generators](../topics/generators.md) docs.
+
+
+Generators have an easy [`chat()`][rigging.generator.Generator.chat] method which you'll
+use to initiate the conversations. You can supply messages in many different forms from
+dictionary objects, full [`Message`][rigging.message.Message] classes, or a simple `str`
+which will be converted to a user message.
+
+```py hl_lines="4-9"
+import rigging as rg
+
+generator = rg.get_generator("claude-3-sonnet-20240229")
+pending = generator.chat( # (1)!
+ [
+ {"role": "system", "content": "You are a wizard harry."},
+ {"role": "user", "content": "Say hello!"},
+ ]
+)
+chat = pending.run()
+print(chat.all)
+# [
+# Message(role='system', parts=[], content='You are a wizard harry.'),
+# Message(role='user', parts=[], content='Say hello!'),
+# Message(role='assistant', parts=[], content='Hello! How can I help you today?'),
+# ]
+```
+
+1. [`generator.chat`][rigging.generator.Generator.chat] is actually just a helper for
+ [`chat(generator, ...)`][rigging.generator.chat], they do the same thing.
+
+??? note "PendingChat vs Chat"
+
+ You'll notice we name the result of `chat()` as `pending`. The naming might be confusing,
+ but chats go through 2 phases. We first stage them into a pending state, where we operate
+ and prepare them in a "pipeline" of sorts before we actually trigger generation with `run()`.
+
+ Calling `.chat()` doesn't trigger any generation, but calling any of these run methods will:
+
+ - [rigging.chat.PendingChat.run][]
+ - [rigging.chat.PendingChat.run_many][]
+ - [rigging.chat.PendingChat.run_batch][]
+
+In this case, we have nothing additional we want to add to our pending chat, and we are only interested
+in generating exactly one response message. We simply call [`.run()`][rigging.chat.PendingChat.chat] to
+execute the generation process and collect our final [`Chat`][rigging.chat.Chat] object.
+
+```py hl_lines="10-11"
+import rigging as rg
+
+generator = rg.get_generator("claude-3-sonnet-20240229")
+pending = generator.chat(
+ [
+ {"role": "system", "content": "You are a wizard harry."},
+ {"role": "user", "content": "Say hello!"},
+ ]
+)
+chat = pending.run()
+print(chat.all)
+# [
+# Message(role='system', parts=[], content='You are a wizard harry.'),
+# Message(role='user', parts=[], content='Say hello!'),
+# Message(role='assistant', parts=[], content='Hello! How can I help you today?'),
+# ]
+```
+
+View more about Chat objects and their properties [over here.][rigging.chat.Chat]. In general, chats
+give you access to exactly what messages were passed into a model, and what came out the other side.
+
+## Conversation
+
+Both `PendingChat` and `Chat` objects provide freedom for forking off the current state of messages, or
+continuing a stream of messages after generation has occured. In general:
+
+- [`PendingChat.fork`][rigging.chat.PendingChat.fork] will clone the current pending chat and let you maintain
+ both the new and original object for continued processing.
+- [`Chat.fork`][rigging.chat.Chat.fork] will produce a fresh `PendingChat` from all the messages prior to the
+ previous generation (useful for "going back" in time).
+- [`Chat.continue_`][rigging.chat.Chat.continue_] is similar to `fork` (actually a wrapper) which tells `fork` to
+ include the generated messages as you move on (useful for "going forward" in time).
+
+```py
+import rigging as rg
+
+generator = rg.get_generator("gpt-3.5-turbo")
+chat = generator.chat([
+ {"role": "user", "content": "Hello, how are you?"},
+])
+
+# We can fork before generation has occured
+specific = chat.fork("Be specific please.").run()
+poetic = chat.fork("Be as poetic as possible").overload(temperature=1.5).run() # (1)!
+
+# We can also continue after generation
+next_chat = poetic.continue_(
+ {"role": "user", "content": "That's good, tell me a joke"}
+)
+
+update = next_chat.run()
+```
+
+1. In this case the temperature change will only be applied to the poetic path because `fork` has
+ created a clone of our pending chat.
+
+## Basic Parsing
+
+Now let's assume we want to ask the model for a piece of information, and we want to make sure
+this item conforms to a pre-defined structure. Underneath rigging uses [Pydantic XML](https://pydantic-xml.readthedocs.io/)
+which itself is built on [Pydantic](https://docs.pydantic.dev/). We'll cover more about
+constructing models in a [later section](../topics/models.md), but don't stress the details for now.
+
+??? note "XML vs JSON"
+
+ Rigging is opinionated with regard to using XML to weave unstructured data with structured contents
+ as the underlying LLM generates text responses. A frequent solution to getting "predictable"
+ outputs from LLMs has been forcing JSON conformant outputs, but we think this is
+ poor form in the long run. You can read more about this from [Anthropic](https://docs.anthropic.com/claude/docs/use-xml-tags)
+ who have done extensive research with their models.
+
+ We'll skip the long rant, but trust us that XML is a very useful syntax which beats
+ JSON any day of the week for typical use cases.
+
+To begin, let's define a `FunFact` model which we'll have the LLM fill in. Rigging exposes a
+[`Model`][rigging.model.Model] base class which you should inherit from when defining structured
+inputs. This is a lightweight wrapper around pydantic-xml's [`BaseXMLModel`](`https://pydantic-xml.readthedocs.io/en/latest/pages/api.html#pydantic_xml.BaseXmlModel`)
+with some added features and functionality to make it easy for Rigging to manage. However, everything
+these models support (for the most part) is also supported in Rigging.
+
+```py hl_lines="3-4"
+import rigging as rg
+
+class FunFact(rg.Model):
+ fact: str # (1)!
+
+chat = rg.get_generator('gpt-3.5-turbo').chat(
+ f"Provide a fun fact between {FunFact.xml_example()} tags."
+).run()
+
+fun_fact = chat.last.parse(FunFact)
+
+print(fun_fact.fact)
+# The Eiffel Tower can be 15 cm taller during the summer due to the expansion of the iron in the heat.
+```
+
+1. This is what pydantic XML refers to as a "primitive" class as it is simply and single
+ typed value placed between the tags. See more about primitive types, elements, and attributes in the
+ [Pydantic XML Docs](https://pydantic-xml.readthedocs.io/en/latest/pages/quickstart.html#primitives)
+
+We need to show the target LLM how to format it's response, so we'll use the
+[`.xml_example()`][rigging.model.Model.xml_example] class method which all models
+support. By default this will simple emit empty XML tags of our model:
+
+```xml
+Provide a fun fact between tags.
+```
+
+??? note "Customizing Model Tags"
+
+ Tags for a model are auto-generated based on the name of the class. You are free
+ to override these by passing `tag=[value]` into your class definition like this:
+
+ ```py
+ class LongNameForThing(rg.Model, tag="short"):
+ ...
+ ```
+
+We wrap up the generation and extract our parsed object by calling [`.parse()`][rigging.message.Message.parse]
+on the [last message][rigging.chat.Chat.last] of our generated chat. This will process the contents
+of the message, extract the first matching model which parses successfully, and return it to us as a python
+object.
+
+```py hl_lines="10"
+import rigging as rg
+
+class FunFact(rg.Model):
+ fact: str
+
+chat = rg.get_generator('gpt-3.5-turbo').chat(
+ f"Provide a fun fact between {FunFact.xml_example()} tags."
+).run()
+
+fun_fact = chat.last.parse(FunFact)
+
+print(fun_fact.fact) # (1)!
+# The Eiffel Tower can be 15 cm taller during the summer due to the expansion of the iron in the heat.
+```
+
+1. Because we've defined `FunFact` as a class, the result if `.parse()` is typed to that object. In our
+ code, all the properties of fact will be available just like we created the object directly.
+
+Notice that we don't have to worry about the model being verbose in it's response, as we've communicated
+that the text between the `#!xml ` tags is the relevent place to put it's answer.
+
+## Strict Parsing
+
+In the example above, we don't handle the case where the model fails to properly conform to our
+desired output structure. If the last message content is invalid in some way, our call to `parse`
+will result in an exception from rigging. Rigging is designed at it's core to manage this process,
+and we have a few options:
+
+1. We can make the parsing optional by switching to [`.try_parse()`][rigging.message.Message.try_parse]. The type
+ of the return value with automatically switch to `#!python FunFact | None` and you can handle cases
+ where parsing failed.
+2. We can extend our pending chat with [`.until_parsed_as()`][rigging.chat.PendingChat] which will cause the
+ `run()` function to internally check if parsing is succeeding before returning the chat back to you.
+
+=== "Option 1 - Trying"
+
+ ```py hl_lines="5"
+ chat = rg.get_generator('gpt-3.5-turbo').chat(
+ f"Provide a fun fact between {FunFact.xml_example()} tags."
+ ).run()
+
+ fun_fact = chat.last.try_parse(FunFact) # fun_fact might now be None
+
+ print(fun_fact or "Failed to get fact")
+ ```
+
+=== "Option 2 - Until"
+
+ ```py hl_lines="3"
+ chat = rg.get_generator('gpt-3.5-turbo').chat(
+ f"Provide a fun fact between {FunFact.xml_example()} tags."
+ ).until_parsed_as(FunFact).run()
+
+ fun_fact = chat.last.parse(FunFact) # This call should never fail
+
+ print(fun_fact or "Failed to get fact")
+ ```
+
+ A couple of comments regarding this structure:
+
+ 1. We still have to call `parse` on the message despite use using `until_parsed_as`. This is
+ a limitation of type hinting as we'd have to turn every `PendingChat` and `Chat` into a generic
+ which could carry types forward. It's a small price for big code complexity savings.
+ 2. Internally, the generation code inside `PendingChat` will attempt to re-generate until
+ the LLM correctly produces a parsable input, up until a maximum number of "rounds" is reached.
+ This process is configurable with the arguments to all [`until`][rigging.chat.PendingChat.until_parsed_as]
+ or [`using`][rigging.chat.PendingChat.using] functions.
+
+## Parsing Many Models
+
+Assuming we wanted to extend our example to produce a set of interesting facts, we have a couple of options:
+
+1. Simply use [`run_many()`][rigging.chat.PendingChat.run_many] and generate N examples individually
+2. Rework our code slightly and let the model provide us multiple facts at once.
+
+=== "Option 1 - Multiple Generations"
+
+ ```py
+ chats = rg.get_generator('gpt-3.5-turbo').chat(
+ f"Provide a fun fact between {FunFact.xml_example()} tags."
+ ).run_many(3)
+
+ for chat in chats:
+ print(chat.last.parse(FunFact).fact)
+ ```
+
+=== "Option 2 - Inline Set"
+
+ ```py
+ chat = rg.get_generator('gpt-3.5-turbo').chat(
+ f"Provide a 3 fun facts each between {FunFact.xml_example()} tags."
+ ).run()
+
+ for fun_fact in chat.last.parse_set(FunFact):
+ print(fun_fact.fact)
+ ```
\ No newline at end of file
diff --git a/docs/home/introduction.md b/docs/home/introduction.md
new file mode 100644
index 0000000..de93c2c
--- /dev/null
+++ b/docs/home/introduction.md
@@ -0,0 +1,39 @@
+# Rigging
+
+Rigging is a lightweight LLM interaction framework built on Pydantic XML. The goal is to make leveraging LLMs in production pipelines as simple and effictive as possible. Here are the highlights:
+
+- **Structured Pydantic models** can be used interchangably with unstructured text output.
+- LiteLLM as the default generator giving you **instant access to a huge array of models**.
+- Add easy **tool calling** abilities to models which don't natively support it.
+- Store different models and configs as **simple connection strings** just like databases.
+- Chat templating, forking, continuations, generation parameter overloads, stripping segments, etc.
+- Modern python with type hints, async support, pydantic validation, serialization, etc.
+
+```py
+import rigging as rg
+from rigging.model import CommaDelimitedAnswer as Answer
+
+answer = rg.get_generator('gpt-4') \
+ .chat(f"Give me 3 famous authors between {Answer.xml_tags()} tags.") \
+ .until_parsed_as(Answer) \
+ .run()
+
+answer = chat.last.parse(Answer)
+print(answer.items)
+
+# ['J. R. R. Tolkien', 'Stephen King', 'George Orwell']
+```
+
+Rigging is built and maintained by [dreadnode](https://dreadnode.io) where we use it daily for our work.
+
+## Installation
+We publish every version to Pypi:
+```bash
+pip install rigging
+```
+
+If you want to build from source:
+```bash
+cd rigging/
+poetry install
+```
diff --git a/docs/home/principles.md b/docs/home/principles.md
new file mode 100644
index 0000000..46eafb3
--- /dev/null
+++ b/docs/home/principles.md
@@ -0,0 +1,23 @@
+# Principles
+
+LLMs are extremely capable machine learning systems, but they operate purely in textual spaces as a byproduct of
+their training data. We have access to the compression of a huge repository of human knowledge, but are limited to quering
+that information via natural language. Our first inclination is to let these language interfaces drive
+our design decisions. We build chat bots and text search, and when it comes time to align them with closely
+with the rest of our fixed software stack, we quickly get frustrated by their inconsistencies and limited
+control over their products.
+
+In software we operate on the principle of known interfaces as the basis for composability. In the functional paradigm, we want our
+software functions to operate like mathmatical ones, where the same input always produces the same output with no side effects.
+Funny enough LLMs (like all models) also operate in that way (minus things like floating point errors), but we intentionally
+inject randomness to our sampling process to give them the freedom to explore and produce novel outputs. Therefore we shouldn't
+aim for "purity" in the strict sense, but we should aim for consistency in their interface.
+
+Once you start to think of a "prompt", "completion", or "chat interaction" as being the temporary textual interface by which we pass in
+structured inputs and produce structured outputs, we can begin to link them with traditional software. Many libraries get close to this
+idea, but they rarely hold the opinion that programing types and structures, and not text, are the best way to make LLM-based
+systems composible.
+
+Reframing these language models as tools which use tokens of text in context windows to navigate latest space and produce
+probabilities of output tokens, but do not need to have the data they consume or produce be holistically constrained to
+textual spaces in our use of them is a core opinion of Rigging.
\ No newline at end of file
diff --git a/docs/index.md b/docs/home/workflow.md
similarity index 56%
rename from docs/index.md
rename to docs/home/workflow.md
index 596caa0..702fe7e 100644
--- a/docs/index.md
+++ b/docs/home/workflow.md
@@ -1,43 +1,3 @@
-# Rigging
-
-Rigging is a lightweight LLM interaction framework built on Pydantic XML. The goal is to make leveraging LLMs in production pipelines as simple and effictive as possible. Here are the highlights:
-
-- **Structured Pydantic models** can be used interchangably with unstructured text output.
-- LiteLLM as the default generator giving you **instant access to a huge array of models**.
-- Add easy **tool calling** abilities to models which don't natively support it.
-- Store different models and configs as **simple connection strings** just like databases.
-- Chat templating, forking, continuations, generation parameter overloads, stripping segments, etc.
-- Modern python with type hints, async support, pydantic validation, serialization, etc.
-
-```py
-import rigging as rg
-from rigging.model import CommaDelimitedAnswer as Answer
-
-answer = rg.get_generator('gpt-4') \
- .chat(f"Give me 3 famous authors between {Answer.xml_tags()} tags.") \
- .until_parsed_as(Answer) \
- .run()
-
-answer = chat.last.parse(Answer)
-print(answer.items)
-
-# ['J. R. R. Tolkien', 'Stephen King', 'George Orwell']
-```
-
-Rigging is built and maintained by [dreadnode](https://dreadnode.io) where we use it daily for our work.
-
-## Installation
-We publish every version to Pypi:
-```bash
-pip install rigging
-```
-
-If you want to build from source:
-```bash
-cd rigging/
-poetry install
-```
-
## Workflow
1. Get a [`Generator`][rigging.generator.Generator] object - usually with [`get_generator()`][rigging.generator.get_generator].
@@ -60,7 +20,7 @@ message (or many messages) based on the prior context and any constraints you ha
You'll often see us use functional styling chaining as most of our
utility functions return the object back to you.
-```python
+```py
chat = generator.chat(...) \
.using(...).until(...).with_(...) \
.run()
diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css
index 81e4a4a..1852eaa 100644
--- a/docs/stylesheets/extra.css
+++ b/docs/stylesheets/extra.css
@@ -1,3 +1,4 @@
+/* Main color overrides */
[data-md-color-scheme="slate"] {
--md-primary-fg-color: #EAEAEA;
--md-accent-fg-color: rgb(149, 133, 227);
@@ -6,26 +7,70 @@
--md-primary-bg-color: #191919;
--md-default-bg-color: #191919;
- --md-default-fg-color: hsla(0, 0%, 100%, 0.90);
- --md-default-fg-color--light: hsla(0, 0%, 100%, 0.70);
- --md-default-fg-color--lighter: hsla(0, 0%, 100%, 0.60);
- --md-default-fg-color--lightest: hsla(0, 0%, 100%, 0.40);
-
- --md-footer-bg-color: hsla(0, 0%, 10%, 0.87);
- --md-footer-bg-color--dark: hsla(0, 0%, 8%, 1);
-
- --md-typeset-a-color: var(--md-accent-fg-color);
-
- --md-code-hl-number-color: rgb(231, 107, 93);
- --md-code-hl-special-color: hsla(340, 83%, 66%, 1);
- --md-code-hl-function-color: hsla(291, 57%, 65%, 1);
- --md-code-hl-constant-color: hsla(250, 62%, 70%, 1);
- --md-code-hl-keyword-color: hsla(219, 66%, 64%, 1);
- --md-code-hl-string-color: var(--md-accent-fg-color);
- --md-code-hl-name-color: var(--md-default-fg-color--light);
- --md-code-hl-operator-color: var(--md-default-fg-color--light);
- --md-code-hl-punctuation-color: var(--md-default-fg-color--light);
- --md-code-hl-comment-color: rgb(55, 161, 108);
- --md-code-hl-generic-color: var(--md-default-fg-color--light);
- --md-code-hl-variable-color: var(--md-default-fg-color--light);
+ --md-default-fg-color: hsla(0, 0%, 100%, 0.90);
+ --md-default-fg-color--light: hsla(0, 0%, 100%, 0.70);
+ --md-default-fg-color--lighter: hsla(0, 0%, 100%, 0.60);
+ --md-default-fg-color--lightest: hsla(0, 0%, 100%, 0.40);
+
+ --md-footer-bg-color: hsla(0, 0%, 10%, 0.87);
+ --md-footer-bg-color--dark: hsla(0, 0%, 8%, 1);
+
+ --md-typeset-a-color: var(--md-accent-fg-color);
+
+ --md-code-hl-number-color: rgb(231, 107, 93);
+ --md-code-hl-special-color: hsla(340, 83%, 66%, 1);
+ --md-code-hl-function-color: hsla(291, 57%, 65%, 1);
+ --md-code-hl-constant-color: hsla(250, 62%, 70%, 1);
+ --md-code-hl-keyword-color: hsla(219, 66%, 64%, 1);
+ --md-code-hl-string-color: var(--md-accent-fg-color);
+ --md-code-hl-name-color: var(--md-default-fg-color--light);
+ --md-code-hl-operator-color: var(--md-default-fg-color--light);
+ --md-code-hl-punctuation-color: var(--md-default-fg-color--light);
+ --md-code-hl-comment-color: rgb(55, 161, 108);
+ --md-code-hl-generic-color: var(--md-default-fg-color--light);
+ --md-code-hl-variable-color: var(--md-default-fg-color--light);
+}
+
+/* Indentation. */
+div.doc-contents:not(.first) {
+ padding-left: 25px;
+ border-left: .05rem solid var(--md-typeset-table-color);
+}
+
+/* Mark external links as such. */
+a.external::after,
+a.autorefs-external::after {
+ /* https://primer.style/octicons/arrow-up-right-24 */
+ mask-image: url('data:image/svg+xml,');
+ -webkit-mask-image: url('data:image/svg+xml,');
+ content: ' ';
+
+ display: inline-block;
+ vertical-align: middle;
+ position: relative;
+
+ height: 1em;
+ width: 1em;
+ background-color: currentColor;
+}
+
+a.external:hover::after,
+a.autorefs-external:hover::after {
+ background-color: var(--md-accent-fg-color);
+}
+
+/* Fancier color for operators such as * and |. */
+.doc-signature .o {
+ color: var(--md-code-hl-special-color);
+}
+
+/* Fancier color for constants such as None, True, and False. */
+.doc-signature .kc {
+ color: var(--md-code-hl-constant-color);
+}
+
+/* Fancier color for built-in types (only useful when cross-references are used). */
+.doc-signature .n>a[href^="https://docs.python.org/"][href*="/functions.html#"],
+.doc-signature .n>a[href^="https://docs.python.org/"][href*="/stdtypes.html#"] {
+ color: var(--md-code-hl-constant-color);
}
\ No newline at end of file
diff --git a/docs/topics/agents.md b/docs/topics/agents.md
new file mode 100644
index 0000000..43bfe53
--- /dev/null
+++ b/docs/topics/agents.md
@@ -0,0 +1,16 @@
+!!! info
+
+ This page is under construction.
+
+# Agents
+
+Building agents in Rigging is a straightforward process. You allow to model to emit a structured model for
+a variety of actions you wish to support, and loop over those generation steps executing code as the
+actions are selected.
+
+The first instinct might be to use [tools][tools.md] for this process, but this might abstract too much
+control away from the generation process.
+
+## Basic Example
+
+todo.
\ No newline at end of file
diff --git a/docs/topics/async-and-batching.md b/docs/topics/async-and-batching.md
new file mode 100644
index 0000000..f7846f4
--- /dev/null
+++ b/docs/topics/async-and-batching.md
@@ -0,0 +1,177 @@
+# Async and Batching
+
+Rigging has good support for handling async generation and large batching of requests. How efficiently
+these mechanisms operates is dependent on the underlying generator that's being used, but Rigging has
+been developed with scale in mind.
+
+## Multiple Generations
+
+The [`.run_many`][rigging.chat.PendingChat.run_many] and [`.arun_many`][rigging.chat.PendingChat.arun_many] functions
+let you take the same inputs and generation parameters, and simply run the generation multiple times.
+
+=== "Run Many Code"
+
+ ```py
+ import rigging as rg
+
+ def check_animal(chats: list[rg.Chat]) -> list[rg.Chat]:
+ return [
+ chat.continue_(f"Why did you pick that animal?").meta(questioned=True).run()
+ if any(a in chat.last.content.lower() for a in ["cat", "dog", "cow", "mouse"])
+ else chat
+ for chat in chats
+ ]
+
+ chats = (
+ rg.get_generator("gpt-3.5-turbo")
+ .chat("Tell me a joke about an animal.")
+ .map(check_animal)
+ .run_many(3)
+ )
+
+ for i, chat in enumerate(chats):
+ questioned = chat.metadata.get("questioned", False)
+ print(f"--- Chat {i+1} (?: {questioned}) ---")
+ print(chat.conversation)
+ print()
+ ```
+
+=== "Output"
+
+ ```
+ --- Chat 1 (?: False) ---
+ [user]: Tell me a joke about an animal.
+
+ [assistant]: Why did the spider go to the computer?
+
+ To check his website!
+
+ --- Chat 2 (?: False) ---
+ [user]: Tell me a joke about an animal.
+
+ [assistant]: Why did the chicken join a band? Because it had the drumsticks!
+
+ --- Chat 3 (?: True) ---
+ [user]: Tell me a joke about an animal.
+
+ [assistant]: Why don't elephants use computers?
+
+ Because they're afraid of the mouse!
+
+ [user]: Why did you pick that animal?
+
+ [assistant]: I chose an elephant because they are known for their intelligence and gentle nature, making them a popular subject for jokes and humorous anecdotes. Plus, imagining an elephant trying to use a computer and being scared of a tiny mouse is a funny visual image!
+ ```
+
+## Batching Inputs
+
+You can use the [`.run_batch`][rigging.chat.PendingChat.run_batch] and [`.arun_batch`][rigging.chat.PendingChat.arun_batch]
+functions to batch accross a set of inputs and collect all the chats. As processing proceeds with things like
+[`.then`][rigging.chat.PendingChat.then] or [`.until_parsed_as`][rigging.chat.PendingChat.until_parsed_as], that chats
+will resolve individually and collapse into the final results.
+
+=== "Batching Inputs Code"
+
+ ```py
+ import rigging as rg
+ from rigging.model import CommaDelimitedAnswer
+
+ pending = (
+ rg.get_generator('gpt-3.5-turbo')
+ .chat({
+ "role": "system",
+ "content": f"Always respond with {CommaDelimitedAnswer.xml_tags()} tags."}
+ )
+ .until_parsed_as(CommaDelimitedAnswer, attempt_recovery=True)
+ )
+
+ many = [f"Give me 3 famous {thing}" for thing in ["authors", "painters", "musicians", "hackers"]]
+
+ chats = await pending.arun_batch(many, skip_failed=True)
+
+ for i, chat in enumerate(chats):
+ print(f"--- Chat {i+1} ({len(chat)}) ---")
+ print(chat.last.parse(CommaDelimitedAnswer).items)
+ print()
+ ```
+
+=== "Outputs"
+
+ ```
+ --- Chat 1 (2) ---
+ ['Leonardo da Vinci', 'Vincent van Gogh', 'Pablo Picasso']
+
+ --- Chat 2 (2) ---
+ ['Michael Jackson', 'Beyoncé', 'The Beatles']
+ ```
+
+!!! tip "Skipping failed results"
+
+ Passing `skip_failed=True` to [`.run_batch`][rigging.chat.PendingChat.run_batch] will cause the function to
+ ignore any parsing errors like [`ExhaustedMaxRoundsError`][rigging.error.ExhaustedMaxRoundsError] and only
+ return the chats that were successful.
+
+
+## Batching Parameters
+
+In addition to batching against input messages or strings, you can fix a single input
+and build a batch accross a set of generation parameters. The inputs to
+[`.run_batch`][rigging.chat.PendingChat.run_batch] and [`.arun_batch`][rigging.chat.PendingChat.arun_batch]
+will scale either the generate parameters or the input messages if either is a single item.
+
+=== "Batching Code"
+
+ ```py
+ import rigging as rg
+
+ pending = rg.get_generator("gpt-3.5-turbo").chat()
+
+ chats = await pending.arun_batch(
+ ["Tell me a short fact about an japanese city."],
+ [rg.GenerateParams(temperature=t) for t in [0.6, 0.9, 1.2, 1.5, 1.8]]
+ )
+
+ for i, chat in enumerate(chats):
+ print(f"--- Chat {i+1} ---")
+ print(chat.generator_id)
+ print()
+ print(chat.conversation)
+ print()
+ ```
+
+=== "Outputs"
+
+ ```
+ --- Chat 1 ---
+ litellm!gpt-3.5-turbo,temperature=0.6
+
+ [assistant]: Tokyo, the capital city of Japan, is the most populous
+ metropolitan area in the world, with over 37 million residents.
+
+ --- Chat 2 ---
+ litellm!gpt-3.5-turbo,temperature=0.9
+
+ [assistant]: Tokyo is the largest metropolitan area in the world,
+ with a population of over 37 million people.
+
+ --- Chat 3 ---
+ litellm!gpt-3.5-turbo,temperature=1.2
+
+ [assistant]: Kyoto, a city in Japan known for its historic temples
+ and gardens, was once the capital of Japan for over 1,000 years from
+ 794 until the capital was moved to Tokyo in 1869.
+
+ --- Chat 4 ---
+ litellm!gpt-3.5-turbo,temperature=1.5
+
+ [assistant]: Nagoya, Japan is known for being one of the leading
+ manufacturing and industrial regions in the country, with a strong
+ automotive presence including major factories for Toyota, Honda, and Mitsubishi.
+
+ --- Chat 5 ---
+ litellm!gpt-3.5-turbo,temperature=1.8
+
+ [assistant]: Sendai is the largest city in the Tohoku region of
+ Japan and is known for its incredible natural scenery, such as the
+ nearby Sendai Bay and Zuihoden mausoleum.
+ ```
\ No newline at end of file
diff --git a/docs/topics/callbacks-and-mapping.md b/docs/topics/callbacks-and-mapping.md
new file mode 100644
index 0000000..263336c
--- /dev/null
+++ b/docs/topics/callbacks-and-mapping.md
@@ -0,0 +1,188 @@
+# Callbacks and Mapping
+
+Rigging is designed to give control over how the generation process works, and what occurs after. In fact, functions
+[`.using()`][rigging.chat.PendingChat.using] to [`.until_parsed_as()`][rigging.chat.PendingChat] actually
+leverage a generic callback system underneath to guide generation. Let's walk through them.
+
+## Until Callbacks
+
+If you want to gain control over the generation process before it completes, you can use the
+[`PendingChat.until()`][rigging.chat.PendingChat.until] or [`PendingCompletion.until()`][rigging.completion.PendingCompletion.until]
+methods. These allow you to register a callback function which participates in generation and
+can decide whether generation should proceed, and exactly how it does so.
+
+```py
+import rigging as rg
+
+class Joke(rg.Model):
+ content: str
+
+def involves_a_cat(message: rg.Message) -> tuple[bool, list[rg.Message]]:
+ if "cat" not in message.content.lower():
+ return True, [message, rg.Message("user", "Please include a cat in your joke")] # (1)!
+ return False, [message]
+
+chat = (
+ rg.get_generator("gpt-3.5-turbo")
+ .chat(f"Tell me a joke about an animal between {Joke.xml_tags()} tags.")
+ .until_parsed_as(Joke)
+ .until(involves_a_cat, drop_dialog=False) # (2)!
+ .run()
+)
+
+print(chat.conversation)
+# [user]: Tell me a joke about an animal between tags.
+# [assistant]: Why did the duck go to the doctor? Because he was feeling a little down!
+# [user]: Please include a cat in your joke
+# [assistant]: Why was the cat sitting on the computer? Because it wanted to keep an eye on the mouse!
+```
+
+1. Returning `True` from this callback tells Rigging to go back to the generator with the supplied
+ messages and rerun the generation step. Whether you're appended messages are used is dependent
+ on the `attempt_recovery=True` on [`PendingChat.until()`][rigging.chat.PendingChat.until]. In
+ this instance our request to include a cat will be appending to the intermediate messages while
+ generation completes. We can essentially provide feedback to the model about how it should attempt
+ to satisfy the callback function.
+2. Our use of `drop_dialog=False` here allows us to see the intermediate steps of resolving
+ our callbacks in the final Chat. It's up to you whether you want these intermediate messages
+ included or not. The default is to drop them once the callbacks resolve.
+
+??? "Using .until on PendingCompletion"
+
+ The interface for a `PendingCompletion` is very similar to `PendingChat`, except that you
+ are only allowed to make a statement about whether generation should retry. You are not
+ currently allowed to inject additional text as intermediate context while your callback
+ is attempting to resolve.
+
+## Then Callbacks
+
+You might prefer to have your callbacks execute after generation completes, and operate on
+the Chat/Completion objects from there. This is functionally very similar to [`PendingChat.until()`][rigging.chat.PendingChat.until]
+and might be preferred to expose more of the parsing internals to your code as opposed to
+the opaque nature of other callback types. Use the [`PendingChat.then()`][rigging.chat.PendingChat.then]
+to register any number of callbacks before executing [`PendingChat.run()`][rigging.chat.PendingChat.run].
+
+??? tip "Async Callbacks"
+
+ You are free to define async versions of your callbacks here, but the type of callbacks
+ registered has to match your use of either sync [`.run()`][rigging.chat.PendingChat.run] variants
+ or their async [`.arun()`][rigging.chat.PendingChat.arun] versions.
+
+=== "Using .then()"
+
+ ```py
+ import rigging as rg
+
+ def check_animal(chat: rg.Chat) -> rg.Chat | None:
+ for animal in ["cat", "dog", "cow", "mouse", "elephant", "chicken"]:
+ if animal in chat.last.content.lower():
+ pending = chat.continue_(f"Why did you pick {animal}?")
+ return pending.meta(questioned=True).run()
+
+ pending = rg.get_generator("gpt-3.5-turbo").chat("Tell me a joke about an animal.")
+ pending = pending.then(check_animal)
+ chats = pending.run_many(3)
+
+ for i, chat in enumerate(chats):
+ questioned = chat.metadata.get("questioned", False)
+ print(f"--- Chat {i+1} (?: {questioned}) ---")
+ print(chat.conversation)
+ print()
+ ```
+
+=== "Output"
+
+ ```
+ --- Chat 1 (?: True) ---
+ [user]: Tell me a joke about an animal.
+
+ [assistant]: Why did the cat sit on the computer? To keep an eye on the mouse!
+
+ [user]: Why did you pick cat?
+
+ [assistant]: Because they are purr-fect for computer-related jokes!
+
+ --- Chat 2 (?: False) ---
+ [user]: Tell me a joke about an animal.
+
+ [assistant]: Why did the duck go to the doctor? Because he was feeling a little "fowl"!
+
+ --- Chat 3 (?: True) ---
+ [user]: Tell me a joke about an animal.
+
+ [assistant]: Why did the chicken join a band? Because it had the drumsticks!
+
+ [user]: Why did you pick chicken?
+
+ [assistant]: Because chickens are always up for a good cluck!
+ ```
+
+## Map Callbacks
+
+Rigging also allows you to map process a group of Chats all at once. This is particularly
+useful for instances of uses of [`.run_many()`][rigging.chat.PendingChat.run_many],
+[`.run_batch()`][rigging.chat.PendingChat.run_batch], or their async variants.
+
+You also might want to take certain actions depending on the state of a set of Chats
+all at once. For instance, attempting re-generation if a certain % of Chats didn't
+meet some criteria.
+
+??? tip "Async Callbacks"
+
+ You are free to define async versions of your callbacks here, but the type of callbacks
+ registered has to match your use of either sync [`.run_many()`][rigging.chat.PendingChat.run_many] variants
+ or their async [`.arun_many()`][rigging.chat.PendingChat.run_many] versions.
+
+=== "Using .map()"
+
+ ```py
+ import rigging as rg
+
+ def check_animal(chats: list[rg.Chat]) -> list[rg.Chat]:
+ return [
+ chat.continue_(f"Why did you pick that animal?").meta(questioned=True).run()
+ if any(a in chat.last.content.lower() for a in ["cat", "dog", "cow", "mouse", "elephant", "chicken"])
+ else chat
+ for chat in chats
+ ]
+
+ chats = (
+ rg.get_generator("gpt-3.5-turbo")
+ .chat("Tell me a joke about an animal.")
+ .map(check_animal)
+ .run_many(3)
+ )
+
+ for i, chat in enumerate(chats):
+ questioned = chat.metadata.get("questioned", False)
+ print(f"--- Chat {i+1} (?: {questioned}) ---")
+ print(chat.conversation)
+ print()
+ ```
+
+=== "Output"
+
+ ```
+ --- Chat 1 (?: True) ---
+ [user]: Tell me a joke about an animal.
+
+ [assistant]: Why did the duck cross the road? To prove he wasn't chicken!
+
+ [user]: Why did you pick that animal?
+
+ [assistant]: I chose a duck because they're known for their sense of humor and whimsical nature! Plus, who doesn't love a good duck joke?
+
+ --- Chat 2 (?: True) ---
+ [user]: Tell me a joke about an animal.
+
+ [assistant]: Why did the chicken join a band? Because it had the drumsticks!
+
+ [user]: Why did you pick that animal?
+
+ [assistant]: I chose a chicken because they are often associated with funny jokes and puns due to their quirky and comedic behavior. Plus, who doesn't love a good chicken joke?
+
+ --- Chat 3 (?: False) ---
+ [user]: Tell me a joke about an animal.
+
+ [assistant]: Why did the duck go to the doctor? Because he was feeling a little down in the dumps!
+ ```
\ No newline at end of file
diff --git a/docs/topics/chats-and-messages.md b/docs/topics/chats-and-messages.md
new file mode 100644
index 0000000..0754d2a
--- /dev/null
+++ b/docs/topics/chats-and-messages.md
@@ -0,0 +1,158 @@
+# Chats and Messages
+
+[`Chat`][rigging.chat.Chat] objects hold a sequence of [`Message`][rigging.message.Message] objects pre and post generation. This
+is the most common way that we interact with LLMs, and the interface of both these and [`PendingChat`][rigging.chat.PendingChat]'s are
+very flexible objects that let you tune the generation process, gather structured outputs, validate parsing, perform text replacements,
+serialize and deserialize, fork conversations, etc.
+
+## Basic Usage
+
+```py
+import rigging as rg
+
+generator = rg.get_generator("claude-2.1")
+chat = generator.chat(
+ [
+ {"role": "system", "content": "You're a helpful assistant."},
+ {"role": "user", "content": "Say hello!"},
+ ]
+).run()
+
+print(chat.last)
+# [assistant]: Hello!
+
+print(f"{chat.last!r}")
+# Message(role='assistant', parts=[], content='Hello!')
+
+print(chat.prev)
+# [
+# Message(role='system', parts=[], content='You're a helpful assistant.'),
+# Message(role='user', parts=[], content='Say hello!'),
+# ]
+
+print(chat.message_dicts)
+# [
+# {'role': 'system', 'content': 'You're a helpful assistant.'},
+# {'role': 'user', 'content': 'Say Hello!'},
+# {'role': 'assistant', 'content': 'Hello!'}
+# ]
+
+print(chat.conversation)
+# [system]: You're a helpful assistant.
+
+# [user]: Say hello!
+
+# [assistant]: Hello!
+```
+
+## Templating (apply)
+
+You can use both [`PendingChat.apply()`][rigging.chat.PendingChat.apply] and [`PendingChat.apply_to_all()`][rigging.chat.PendingChat.apply_to_all]
+to swap values prefixed with `$` characters inside message contents for fast templating support.
+
+This functionality uses [string.Template.safe_substitute](https://docs.python.org/3/library/string.html#string.Template.safe_substitute) underneath.
+
+```py
+import rigging as rg
+
+template = rg.get_generator("gpt-4").chat([
+ {"role": "user", "content": "What is the capitol of $country?"},
+])
+
+for country in ["France", "Germany"]:
+ print(template.apply(country=country).run().last)
+
+# The capital of France is Paris.
+# The capital of Germany is Berlin.
+```
+
+## Parsed Parts
+
+Message objects hold all of their parsed [`ParsedMessagePart`][rigging.message.ParsedMessagePart]'s inside their
+[`.parts`][rigging.chat.Message.parts] property. These parts maintain both the instance of the parsed Rigging
+model object and a [`.slice_`][rigging.message.ParsedMessagePart.slice_] property that defines exactly
+where in the message content they are located.
+
+Every time parsing occurs, these parts are re-synced by using [`.to_pretty_xml()`][rigging.model.Model.to_pretty_xml]
+on the model, and stitching the clean content back into the message, fixing any other slices which might
+have been affected by the operation, and ordering the [`.parts`][rigging.chat.Message.parts] property based on where
+they occur in the message content.
+
+```py
+import rigging as rg
+from pydantic import StringConstraints
+from typing import Annotated
+
+str_strip = Annotated[str, StringConstraints(strip_whitespace=True)]
+
+class Summary(rg.Model):
+ content: str_strip
+
+message = rg.Message(
+ "assistant",
+ "Sure, the summary is: Rigging is a very powerful library . I hope that helps!"
+)
+
+message.parse(Summary)
+
+print(message.content) # (1)!
+# Sure, the summary is: Rigging is a very powerful library. I hope that helps!
+
+print(message.parts)
+# [
+# ParsedMessagePart(model=Summary(content='Rigging is a very powerful library'), slice_=slice(22, 75, None))
+# ]
+
+print(message.content[message.parts[0].slice_])
+# Rigging is a very powerful library
+```
+
+1. Notice how our message content got updated to reflect fixing the the extra whitespace
+ in our start tag and our string stripping annotation.
+
+## Stripping Parts
+
+Because we track exactly where a parsed model is inside a message, we can cleanly remove just that portion from
+the content and re-sync the other parts to align with the new content. This is helpful for removing context
+from a conversation that you might not want there for future generations.
+
+This is a very powerful primitive, that allows you to operate on messages more like a collection of structured
+models than raw text.
+
+```py
+import rigging as rg
+
+class Reasoning(rg.Model):
+ content: str
+
+meaning = rg.get_generator("claude-2.1").chat([
+ {
+ "role": "user",
+ "content": "What is the meaning of life in one sentence? "
+ f"Document your reasoning between {Reasoning.xml_tags()} tags.",
+ },
+]).run()
+
+# Gracefully handle mising models
+reasoning = meaning.last.try_parse(Reasoning)
+if reasoning:
+ print("Reasoning:", reasoning.content)
+
+# Strip parsed content to avoid sharing
+# previous thoughts with the model.
+without_reasons = meaning.strip(Reasoning)
+print("Meaning of life:", without_reasons.last.content)
+
+# follow_up = without_thoughts.continue_(...)
+```
+
+## Metadata
+
+Both Chats and PendingChats support the concept of arbitrary metadata that you can use to
+store things like tags, metrics, and supporting data for storage, sorting, and filtering.
+
+- [`PendingChat.meta()`][rigging.chat.PendingChat.meta] adds to [`PendingChat.metadata`][rigging.chat.PendingChat.metadata]
+- [`Chat.meta()`][rigging.chat.Chat.meta] adds to [`Chat.metadata`][rigging.chat.Chat.metadata]
+
+Metadata will carry forward from a PendingChat to a Chat object when generation completes. This
+metadata is also maintained in the [serialization process](serialization.md).
\ No newline at end of file
diff --git a/docs/topics/chats.md b/docs/topics/chats.md
deleted file mode 100644
index af84250..0000000
--- a/docs/topics/chats.md
+++ /dev/null
@@ -1,99 +0,0 @@
-!!! note
- This content is currently being refactored
-
-### Basic Chats
-
-```python
-import rigging as rg
-
-generator = rg.get_generator("claude-2.1")
-chat = generator.chat(
- [
- {"role": "system", "content": "You are a wizard harry."},
- {"role": "user", "content": "Say hello!"},
- ]
-).run()
-
-print(chat.last)
-# [assistant]: Hello!
-
-print(f"{chat.last!r}")
-# Message(role='assistant', parts=[], content='Hello!')
-
-print(chat.prev)
-# [
-# Message(role='system', parts=[], content='You are a wizard harry.'),
-# Message(role='user', parts=[], content='Say hello!'),
-# ]
-
-print(chat.json)
-# [{ ... }]
-
-```
-
-### Continuing Chats
-
-```python
-import rigging as rg
-
-generator = rg.get_generator("gpt-3.5-turbo")
-chat = generator.chat([
- {"role": "user", "content": "Hello, how are you?"},
-])
-
-# We can fork (continue_) before generation has occured
-specific = chat.fork("Be specific please.").run()
-poetic = chat.fork("Be as poetic as possible").overload(temperature=1.5).run()
-
-# We can also fork (continue_) after generation
-next_chat = poetic.fork(
- {"role": "user", "content": "That's good, tell me a joke"}
-)
-
-update = next_chat.run()
-```
-
-### Basic Templating
-
-```python
-import rigging as rg
-
-template = rg.get_generator("gpt-4").chat([
- {"role": "user", "content": "What is the capitol of $country?"},
-])
-
-for country in ["France", "Germany"]:
- print(template.apply(country=country).run().last)
-
-# The capital of France is Paris.
-# The capital of Germany is Berlin.
-```
-
-### Strip Parsed Sections
-
-```python
-import rigging as rg
-
-class Reasoning(rg.Model):
- content: str
-
-meaning = rg.get_generator("claude-2.1").chat([
- {
- "role": "user",
- "content": "What is the meaning of life in one sentence? "
- f"Document your reasoning between {Reasoning.xml_tags()} tags.",
- },
-]).run()
-
-# Gracefully handle mising models
-reasoning = meaning.last.try_parse(Reasoning)
-if reasoning:
- print("reasoning:", reasoning.content.strip())
-
-# Strip parsed content to avoid sharing
-# previous thoughts with the model.
-without_reasons = meaning.strip(Reasoning)
-print("meaning of life:", without_reasons.last.content.strip())
-
-# follow_up = without_thoughts.continue_(...)
-```
\ No newline at end of file
diff --git a/docs/topics/cheatsheet.md b/docs/topics/cheatsheet.md
new file mode 100644
index 0000000..c372f75
--- /dev/null
+++ b/docs/topics/cheatsheet.md
@@ -0,0 +1,14 @@
+!!! info
+
+ This page is under construction.
+
+# Cheatsheet
+
+## Generators
+
+| Method | Description |
+| ------------------------------------------------------ | ------------------------------------------------------------- |
+| [`get_generator`][rigging.generator.get_generator] | Convert a string id to a generator |
+| [`get_identifier`][rigging.generator.get_generator] | Convert a generator back to a string |
+| [`Generator.chat`][rigging.chat.Generator.chat] | Start a [`PendingChat`][rigging.chat.PendingChat] |
+| [`Generator.complete`][rigging.chat.Generator.complete] | Start a [`PendingCompletion`][rigging.completion.PendingCompletion] |
diff --git a/docs/topics/completions.md b/docs/topics/completions.md
new file mode 100644
index 0000000..56b3faa
--- /dev/null
+++ b/docs/topics/completions.md
@@ -0,0 +1,70 @@
+# Completions
+
+The majority of Rigging was built around "instruct" or "chat" LLM interfaces where
+a base model has been tuned to work with a structured layer on top of raw text completion. We typically
+find that base models are more unpredictable with their outputs, tend to be more sensitive to small
+changes in their context windows, and require frequent use of [stop tokens][rigging.generator.GenerateParams.stop]
+to prevent unneccesary generation.
+
+However, there are some places where completing raw text and working with base models might be desirable:
+
+- Fewer restrictions on the types of content they will generate
+- Speeding up generation and lowering token usage by discouraging verbose responses
+- Leveraging prompts from popular libraries like [LangChain](https://python.langchain.com/) which assume
+ a completions-style interface
+
+## Interface Parity
+
+While we try to maintain parity between the "Chat" and "Completions" interfaces in Rigging, you'll
+find some deviations here and there. Completions should be a simple transition if you are familiar
+with the other code in rigging. Here are the highlights:
+
+- [`chat`][rigging.generator.Generator.chat] ~= [`complete`][rigging.generator.Generator.complete]
+- [`Chat`][rigging.chat.Chat] ~= [`Completion`][rigging.completion.Completion]
+- [`PendingChat`][rigging.chat.PendingChat] ~= [`PendingCompletion`][rigging.completion.PendingCompletion]
+- [`generate_messages`][rigging.generator.Generator.generate_messages] ~= [`generate_texts`][rigging.generator.Generator.generate_texts]
+
+On all of these interfaces, you'll note that sequences of [`Message`][rigging.message.Message] objects have been
+replaced with basic `str` objects for both inputs and ouputs.
+
+## Translator Example
+
+Let's build a simply translator object that we can store as a [`PendingCompletion`][rigging.completion.PendingCompletion]
+and use it quickly translate a phrase to 3 different languages.
+
+```py
+PROMPT = """\
+As an expert translator, you accept english text and translate it to $language.
+
+# Format
+
+Input: [english text]
+Output: [translated text]
+---
+
+Input: $input
+Output: """
+
+translator = rg.get_generator('gpt-3.5-turbo') \
+ .complete(PROMPT) \
+ .with_(stop=["---", "Input:", "\n\n"])
+
+text = "Could you please tell me where the nearest train station is?"
+
+for language in ["spanish", "french", "german"]:
+ completion = translator.apply(
+ language=language,
+ input=text
+ ).run()
+ print(f"[{language}]: {completion.generated}")
+
+# [spanish]: ¿Podría decirme por favor dónde está la estación de tren más cercana?
+# [french]: Pouvez-vous me dire où se trouve la gare la plus proche, s'il vous plaît ?
+# [german]: Könnten Sie mir bitte sagen, wo sich der nächste Bahnhof befindet?
+```
+
+!!! tip "Using .apply()"
+
+ Text completion is a great place to use the [`.apply`][rigging.completion.PendingCompletion.apply]
+ method as we can easily slot in our inputs without using [`.add`][rigging.completion.PendingCompletion.add]
+ and following it with our output section of the prompt.
\ No newline at end of file
diff --git a/docs/topics/generators.md b/docs/topics/generators.md
index c65d5be..2f2c4c8 100644
--- a/docs/topics/generators.md
+++ b/docs/topics/generators.md
@@ -1,26 +1,126 @@
-!!! note
- This content is currently being refactored
+# Generators
-### Overload Generation Params
+Underlying LLMs (or any function which completes text) is represented as a generator in Rigging.
+They are typically instantiated using identifier strings and the [`get_generator`][rigging.generator.get_generator] function.
+The base interface is flexible, and designed to support optimizations should the underlying mechanisms support it (batching
+async, K/V cache, etc.)
-```python
-import rigging as rg
+## Identifiers
-pending = rg.get_generator("gpt-3.5-turbo,max_tokens=50").chat([
- {"role": "user", "content": "Say a haiku about boats"},
-])
+Much like database connection strings, Rigging generators can be represented as strings which define what provider, model,
+API key, generation params, etc. should be used. They are formatted as follows:
-for temp in [0.1, 0.5, 1.0]:
- print(pending.overload(temperature=temp).run().last.content)
+```
+!,<**kwargs>
+```
+
+- `provider` maps to a particular subclass of [`Generator`][rigging.generator.Generator].
+- `model` is a any `str` value, typically used by the provider to indicate a specific LLM to target.
+- `kwargs` are used to carry serialized [`GenerateParams`][rigging.generator.GenerateParams] to items like temp, stop tokens, etc.
+
+The provider is optional and Rigging will fallback to `litellm`/[`LiteLLMGenerator`][rigging.generator.LiteLLMGenerator] by default.
+You can view the [LiteLLM docs](https://docs.litellm.ai/docs/) for more information about supported model providers and parameters.
+
+Here are some examples of valid identifiers:
```
+gpt-3.5-turbo,temperature=0.5
+openai/gpt-4,api_key=sk-1234
+litellm!claude-3-sonnet-2024022
+anthropic/claude-2.1,stop=output:;---,seed=1337
+together_ai/meta-llama/Llama-3-70b-chat-hf
+```
+
+Building generators from string identifiers is optional, but a convenient way to represent complex LLM configurations.
+
+!!! tip "Back to Strings"
+
+ Any generator can be converted back into an identifier using either [`to_identifier`][rigging.generator.Generator.to_identifier]
+ or [`get_identifier`][rigging.generator.get_identifier].
+
+ ```py
+ generator = rg.get_generator("gpt-3.5-turbo,temperature=0.5")
+ print(generator.to_identifier())
+ # litellm!gpt-3.5-turbo,temperature=0.5
+ ```
+
+## API Keys
+
+All generators carry a [`.api_key`][rigging.generator.Generator.api_key] attribute which can be set directly, or by
+passing `,api_key=` as part of an identifier string. Not all generators will require one, but they are common enough
+that we include the attribute as part of the base class.
+
+Typically you will be using a library like LiteLLM underneath, and can simply use environment variables:
+
+```bash
+export OPENAI_API_KEY=...
+export TOGETHER_API_KEY=...
+export TOGETHERAI_API_KEY=...
+export MISTRAL_API_KEY=...
+export ANTHROPIC_API_KEY=...
+```
+
+## Generator interface
+
+::: rigging.generator.Generator
+ options:
+ show_source: false
+ show_signature: false
+ members:
+ - generate_messages
+ - generate_texts
+
+
+## Overload Generation Params
-### Custom Generator
+When working with both [`PendingCompletion`][rigging.completion.PendingCompletion] and [`PendingChat`][rigging.chat.PendingChat], you
+can overload and update any generation params by using the associated [`.with_()`][rigging.chat.PendingChat.with_] function.
-Any custom generator simply needs to implement a `complete` function, and
-then it can be used anywhere inside rigging.
+=== "with_() as keyword arguments"
-```python
+ ```py
+ import rigging as rg
+
+ pending = rg.get_generator("gpt-3.5-turbo,max_tokens=50").chat([
+ {"role": "user", "content": "Say a haiku about boats"},
+ ])
+
+ for temp in [0.1, 0.5, 1.0]:
+ print(pending.with_(temperature=temp).run().last.content)
+ ```
+
+=== "with_() as `GenerateParams`"
+
+ ```py
+ import rigging as rg
+
+ pending = rg.get_generator("gpt-3.5-turbo,max_tokens=50").chat([
+ {"role": "user", "content": "Say a haiku about boats"},
+ ])
+
+ for temp in [0.1, 0.5, 1.0]:
+ print(pending.with_(rg.GenerateParams(temperature=temp)).run().last.content)
+ ```
+
+
+## Writing a Generator
+
+All generators should inherit from the [`Generator`][rigging.generator.Generator] base class, and
+can elect to implement a series of messages, text, and async methods:
+
+- [`def generate_messages(...)`][rigging.generator.Generator.generate_messages] - Used for [`PendingChat.run`][rigging.chat.PendingChat.run] variants.
+- [`async def agenerate_messages(...)`][rigging.generator.Generator.agenerate_messages] - Used for [`PendingChat.arun`][rigging.chat.PendingChat.arun] variants.
+- [`def generate_texts(...)`][rigging.generator.Generator.generate_texts] - Used for [`PendingCompletion.run`][rigging.completion.PendingCompletion.run] variants.
+- [`async def agenerate_texts(...)`][rigging.generator.Generator.agenerate_texts] - Used for [`PendingCompletion.arun`][rigging.completion.PendingCompletion.arun] variants.
+
+*If your generator doesn't implement a particular method like async or text completions, Rigging
+will simply raise a `NotImplementedError` for you*
+
+Generators operate in a batch context by default, taking in groups of message lists or texts. Whether
+your implementation takes advantage of this batching is up to you, but where possible you
+should be optimizing as much as possible.
+
+```py
class Custom(Generator):
# model: str
# api_key: str
@@ -28,25 +128,44 @@ class Custom(Generator):
custom_field: bool
- def complete(
+ def generate_messages(
self,
- messages: t.Sequence[rg.Message],
- overloads: GenerateParams = GenerateParams(),
- ) -> rg.Message:
+ messages: t.Sequence[t.Sequence[Message]],
+ params: t.Sequence[GenerateParams],
+ *,
+ prefix: t.Sequence[Message] | None = None, # (1)!
+ ) -> t.Sequence[Message]:
+ # If you aren't using prefix for any caching,
+ # you'll frequently just concatenate it
+ if prefix is not None:
+ messages = [list(prefix) + list(messages) for messages in messages]
+
+ # merge_with is an easy way to combine overloads
+ params = [
+ self.params.merge_with(p).to_dict() for p in params
+ ]
+
# Access self vars where needed
api_key = self.api_key
model_id = self.model
- # Merge in args for API overloads
- marged: dict[str, t.Any] = self._merge_params(overloads)
+ # output_messages = ...
- # response: str = ...
-
- return rg.Message("assistant", response)
+ return output_messages
generator = Custom(model='foo', custom_field=True)
generator.chat(...)
```
-*Note: we currently don't have anyway to "register" custom generators for `get_generator`.*
\ No newline at end of file
+!!! tip "Registering Generators"
+
+ Use the [`register_generator`][rigging.generator.register_generator] method to add your generator
+ class under a custom provider id so it can be used with [`get_generator`][rigging.generator.get_generator].
+
+ ```py
+ import rigging as rg
+
+ rg.register_generator('custom', Custom)
+ custom = rg.get_generator('custom!foo')
+ ```
\ No newline at end of file
diff --git a/docs/topics/setup_logging.md b/docs/topics/logging.md
similarity index 85%
rename from docs/topics/setup_logging.md
rename to docs/topics/logging.md
index 1074a81..59d21dd 100644
--- a/docs/topics/setup_logging.md
+++ b/docs/topics/logging.md
@@ -1,17 +1,19 @@
+# Logging
+
Rigging uses [loguru](https://loguru.readthedocs.io/) for it's logging. By default it disables it's logger allowing users to choose when/how to gather messages.
If you want to let rigging messages flow into loguru, you should enable it:
-```python
+```py
from loguru import logger
logger.enable('rigging')
```
If you want to have some sane default handlers with dual console & file logging,
-you can use the [rigging.logging.configure_logging][] function.
+you can use the [rigging.logging.configure_logging][] function to configure loguru.
-```python
+```py
from rigging.logging import configure_logging
configure_logging(
diff --git a/docs/topics/models.md b/docs/topics/models.md
index 36ca97e..1484af8 100644
--- a/docs/topics/models.md
+++ b/docs/topics/models.md
@@ -1,88 +1,302 @@
-!!! note
- This content is currently being refactored
+# Writing Models
-### Model Parsing
+Model definitions are at the core of Rigging, and provide an extremely powerful interface of defining exactly
+what kinds of input data you support and how it should be validated. Unlike other LLM libraries, the definition
+of strict types in code is how you navigate the complexity of working with stochastic text in your code.
-```python
-import rigging as rg
+"If the parsing succeeds, I'm now safe to use this data inside my code."
-class Answer(rg.Model):
- content: str
+## Fundamentals
-chat = (
- rg.get_generator("claude-3-haiku-20240307")
- .chat([
- {"role": "user", "content": f"Say your name between {Answer.xml_tags()}."},
- ])
- .until_parsed_as(Answer)
- .run()
-)
+Every model in rigging should extend the [`Model`][rigging.model.Model] base class. This is a lightweight wrapper around pydantic-xml's [`BaseXMLModel`](`https://pydantic-xml.readthedocs.io/en/latest/pages/api.html#pydantic_xml.BaseXmlModel`)
+with some added features and functionality to make it easy for Rigging to manage. In general this includes:
-answer = chat.last.parse(Answer)
-print(answer.content)
+1. More intelligent parsing for the imperfect text which LLMs frequently provide. Nested tags, unclear sub-structures,
+ multiple models scattered in the text, etc. See the [`.from_text()`][rigging.model.Model.from_text] method for the details.
+2. A nicer [`.to_pretty_xml()`][rigging.model.Model.to_pretty_xml] to get new-line formatted outputs.
+3. Some basic handling for escaping interior XML tags which normally wouldn't parse correctly.
+4. Helpers to ensure the tag names from models are consistent and automatic.
-# "Claude"
+However, everything pydantic-xml (and by extention normal pydantic) models support is also supported in Rigging.
-print(f"{chat.last!r}")
+!!! tip "Background Knowledge"
-# Message(role='assistant', parts=[
-# ParsedMessagePart(model=Answer(content='Claude'), ref='Claude')
-# ], content='Claude')
+ If you happen to be new to modern python like type hinting and pydantic models, we would highly
+ recommend you spend some time in their documentation to get more familiar. Without this background,
+ many of the Rigging features will seem confusing.
-chat.last.content = "new content" # Updating content strips parsed parts
-print(f"{chat.last!r}")
+ - [Python Typing](https://docs.python.org/3/library/typing.html)
+ - [Pydantic](https://docs.pydantic.dev/)
+ - [Pydantic XML](https://pydantic-xml.readthedocs.io/)
-# Message(role='assistant', parts=[], content='new content')
-```
+## Primitive Models
-### Mutliple Models
+Often, you just want to indicate to the LLM that it should place a block of text between
+tags so you can extract just that portion of the message content.
-```python
+```py
import rigging as rg
-class Joke(rg.Model):
- content: str
-
-chat = (
- rg.get_generator("claude-2.1")
- .chat([{
- "role": "user",
- "content": f"Provide 3 short jokes each wrapped with {Joke.xml_tags()} tags."},
- ])
- .run()
-)
-
-jokes = chat.last.parse_set(Joke)
-
-# [
-# Joke(content="Why don't eggs tell jokes? They'd crack each other up!"),
-# Joke(content='What do you call a bear with no teeth? A gummy bear!'),
-# Joke(content='What do you call a fake noodle? An Impasta!')
-# ]
+class FunFact(rg.Model):
+ fact: str
```
-### Complex Models
+Pydantic XML refers to these as "primitive" because they have a single field. These models
+leverage the minimal functionality of XML parsing and just take the contents between the start
+and end tags.
+
+??? note "Parsing for Primitive Models"
-```python
+ We have a pending TODO to replace the internals of Pydantic XML parsing to make it
+ more flexible for the kinds of "broken" XML that LLMs frequently produce.
+ Primitive models have special handling in Rigging to make them more flexible
+ for parsing this "broken" XML. If you are having issues with complex parsing,
+ using primitive models is a good escape hatch for now.
+
+```py
import rigging as rg
-class Inner(rg.Model):
- type: str = rg.attr()
- content: str
+class FunFact(rg.Model):
+ fact: str
+
+FunFact(fact="Rigging is pretty easy to use").to_pretty_xml()
+# 'Rigging is pretty easy to use'
+```
+
+Notice that the name of our interior field (`.fact`) isn't relevant to the XML structure. We only
+use this to access the contents of that model field in code. However the name of our model (`FunFact`)
+**is relevant** to the XML representation. What you name your models is important to how the underlying
+LLM interprets it's meaning. You should be thoughtful and descriptive about your model names as they
+will have effects on how well the LLM understands the intention, and how likely it is to output one
+model over another (based on it's token probability distribution).
+
+If you want to seperate the model tag from it's class name, you specify it in the class construction:
+
+```py
+class FunFact(rg.Model, tag="a-super-fun-fact")
+ ...
+```
+
+## Typing and Validation
+
+Even if models are primitive, we can still use type hinting and pydantic validation to ensure
+that the content between tags conforms to any constraints we need. Take this example from a default
+Rigging model for instance:
+
+```py
+class YesNoAnswer(Model):
+ "Yes/No answer answer with coercion"
+
+ boolean: bool
+ """The boolean value of the answer."""
+
+ @field_validator("boolean", mode="before")
+ def parse_str_to_bool(cls, v: t.Any) -> t.Any:
+ if isinstance(v, str):
+ if v.strip().lower().startswith("yes"):
+ return True
+ elif v.strip().lower().startswith("no"):
+ return False
+ return v
+```
+
+You can see the interior field of the model is now a `bool` type, which means pydantic will accept standard
+values which could be reasonably interpreted as a boolean. We also add a custom field validator to
+check for instances of `yes/no` as text strings. All of these XML values will parse correctly:
+
+```xml
+true
+False
+yes, it is.
+ NO
+1
+```
+
+The choice to build on Pydantic offers an incredible amount of flexibility for controlling exactly
+how data is validated in your models. This kind of parsing work is exactly what these libraries were designed
+to do. The sky is the limit, and **everything you find in Pydantic and Pydantic XML are compatible
+with Rigging.**
+
+## Handling Multiple Fields
+
+Unlike vanilla Pydantic, our use of Pydantic XML forces us to think about exactly how models with multiple fields
+will be represented in XML syntax. Take this as an example:
+
+```py
+class Person(rg.Model):
+ name: str
+ age: int
+```
+
+In XML this could be any of the following:
+
+```xml
+
+
+
+ 30
+
+
+
+ Will
+ 30
+
+
+...
+```
+
+*You get the idea.* Pydantic XML handles this all very well and offers different ways of defining
+your fields to specific whether they should be **attributes** or child **elements**. You can read
+more about this in [their documentation.](https://pydantic-xml.readthedocs.io/en/latest/pages/quickstart.html#primitives)
+
+The basic rule is this: **If your model has more than one field, you need to define every field as
+either an attribute or an element**
+
+How exactly you structure your models and their associated representations is completely up to you.
+Our general guide is that LLMs tend to work better with elements over attributes.
+
+
+=== "Model Definition"
-class Outer(rg.Model):
+ ```py
+ class Person(rg.Model):
+ name: str = rg.element()
+ age: int = rg.element()
+ ```
+
+=== "XML Format"
+
+ ```xml
+
+
+
+
+ ```
+
+
+## Complex Models
+
+Let's design a model which will hold required information for making a web request. We'll begin with an outline of our model:
+
+```py
+class Header(rg.Model):
+ name: str
+ value: str
+
+class Parameter(rg.Model):
+ name: str
+ value: str
+
+class Request(rg.Model):
+ method: str
+ path: str
+ headers: list
+ url_params: list
+ body: str
+```
+
+We'll start with a few standard string constraints to strip extra white-space (which LLMs tend to include)
+and automatically convert our method to upper case.
+
+```py
+from pydantic import StringConstraints
+
+str_strip = t.Annotated[str, StringConstraints(strip_whitespace=True)]
+str_upper = t.Annotated[str, StringConstraints(to_upper=True)]
+
+class Header(rg.Model):
+ name: str
+ value: str_strip
+
+class Parameter(rg.Model):
+ name: str
+ value: str_strip
+
+class Request(rg.Model):
+ method: str_upper
+ path: str_strip
+ headers: list
+ url_params: list
+ body: str_strip
+```
+
+Next we'll assign our fields to attributes and elements.
+
+```py
+from pydantic import StringConstraints
+
+str_strip = t.Annotated[str, StringConstraints(strip_whitespace=True)]
+str_upper = t.Annotated[str, StringConstraints(to_upper=True)]
+
+class Header(rg.Model):
name: str = rg.attr()
- inners: list[Inner] = rg.element()
+ value: str_strip = rg.element()
+
+class Parameter(rg.Model):
+ name: str = rg.attr()
+ value: str_strip = rg.element()
+
+class Request(rg.Model):
+ method: str_upper = rg.attr()
+ path: str_strip = rg.attr()
+ headers: list
+ url_params: list
+ body: str_strip = rg.element()
+```
+
+In terms of handling our headers and URL parameters, we want these to be a list of child
+elements which are wrapped in a parent tag. We also want these and our body to be optional.
+
+```py
+from pydantic import StringConstraints
+
+str_strip = t.Annotated[str, StringConstraints(strip_whitespace=True)]
+str_upper = t.Annotated[str, StringConstraints(to_upper=True)]
+
+class Header(rg.Model):
+ name: str = rg.attr()
+ value: str_strip
+
+class Parameter(rg.Model):
+ name: str = rg.attr()
+ value: str_strip
+
+class Request(rg.Model):
+ method: str_upper = rg.attr()
+ path: str = rg.attr()
+ headers: list[Header] = rg.wrapped("headers", rg.element(default=[]))
+ url_params: list[Parameter] = rg.wrapped("url-params", rg.element(default=[]))
+ body: str_strip = rg.element(default="")
+```
+
+Let's check our final work:
+
+=== "Model in Code"
-outer = Outer(name="foo", inners=[
- Inner(type="cat", content="meow"),
- Inner(type="dog", content="bark")
-])
+ ```py
+ Request(
+ method="POST",
+ path="/api/v1/search",
+ headers=[
+ Header(name="Authorization", value="Bearer sk-1234")
+ ],
+ url_params=[
+ Parameter(name="max", value="100")
+ ],
+ body="search=rigging"
+ )
+ ```
-print(outer.to_pretty_xml())
+=== "Model as XML"
-#
-# meow
-# bark
-#
-```
\ No newline at end of file
+ ```xml
+
+
+ Bearer sk-1234
+
+
+ 100
+
+ search=rigging
+
+ ```
\ No newline at end of file
diff --git a/docs/topics/serialization.md b/docs/topics/serialization.md
new file mode 100644
index 0000000..e9eaa6b
--- /dev/null
+++ b/docs/topics/serialization.md
@@ -0,0 +1,121 @@
+# Serialization
+
+The following objects in Rigging have great serialization support for storage and retrieval:
+
+- [`Chat`][rigging.chat.Chat]
+- [`Completion`][rigging.completion.Completion]
+- [`Generator`][rigging.generator.Generator]
+- [`Model`][rigging.model.Model]
+
+Most of this stems from our use of Pydantic for core models, and we've included some helpful
+fields for reconstructing Chats and Completions.
+
+## Serializing Chats
+
+Let's build a joke pipeline and serialize the final chat into JSON.
+
+=== "Serialization Code"
+
+ ```py
+ import rigging as rg
+
+ class Joke(rg.Model):
+ content: str
+
+ chat = rg.get_generator("gpt-3.5-turbo") \
+ .chat(f"Provide 3 jokes each between {Joke.xml_tags()} tags.") \
+ .meta(tags=['joke']) \
+ .with_(temperature=1.25) \
+ .run()
+
+ chat.last.parse_set(Joke)
+
+ serialized = chat.model_dump_json(indent=2)
+ print(serialized)
+ ```
+
+=== "Serialized JSON"
+
+ ```json
+ {
+ "uuid": "891c3834-2588-4652-8371-e9746086fd46",
+ "timestamp": "2024-05-10T11:44:15.501326",
+ "messages": [
+ {
+ "role": "user",
+ "parts": [],
+ "content": "Provide 3 jokes each between tags."
+ }
+ ],
+ "generated": [
+ {
+ "role": "assistant",
+ "parts": [
+ {
+ "model": {
+ "content": " Why was the math book sad? Because it had too many problems. "
+ },
+ "slice_": [
+ 0,
+ 75
+ ]
+ },
+ {
+ "model": {
+ "content": " I told my wife she should embrace her mistakes. She gave me a hug. "
+ },
+ "slice_": [
+ 76,
+ 157
+ ]
+ },
+ {
+ "model": {
+ "content": " Why did the scarecrow win an award? Because he was outstanding in his field. "
+ },
+ "slice_": [
+ 158,
+ 249
+ ]
+ }
+ ],
+ "content": " Why was the math book sad? Because it had too many problems. \n I told my wife she should embrace her mistakes. She gave me a hug. \n Why did the scarecrow win an award? Because he was outstanding in his field. "
+ }
+ ],
+ "metadata": {
+ "tags": [
+ "joke"
+ ]
+ },
+ "generator_id": "litellm!gpt-3.5-turbo,temperature=1.25"
+ }
+ ```
+
+You'll notice that every Chat gets a unique `id` field to help track them in a datastore like Elastic or Pandas. We also
+assign a `timestamp` to understand when the generation took place. We are also taking advantage of the
+[`.meta()`][rigging.chat.PendingChat.meta] to add a tracking tag for filtering later.
+
+## Deserializing Chats
+
+The JSON has everything required to reconstruct a Chat including a `generator_id` dynamically
+constructed to perserve the parameters used to create the generated message(s). We can now
+deserialize a chat from a datastore, and immediately step back into a
+[`PendingChat`][rigging.chat.PendingChat] for exploration.
+
+```py
+chat = rg.Chat.model_validate_json(serialized)
+print(chat.conversation)
+# [user]: Provide 3 jokes each between tags.
+
+# [assistant]:
+# Why was the math book sad? Because it had too many problems.
+# I told my wife she should embrace her mistakes. She gave me a hug.
+# Why did the scarecrow win an award? Because he was outstanding in his field.
+
+continued = chat.continue_("Please explain the first joke to me.").run()
+print(continued.last)
+# [assistant]: In the first joke, the pun is based on the double meaning of the word "problems."
+# The math book is described as being sad because it has "too many problems," which could be
+# interpreted as having both mathematical problems (equations to solve) and emotional difficulties.
+# This play on words adds humor to the joke.
+```
\ No newline at end of file
diff --git a/docs/topics/tools.md b/docs/topics/tools.md
index 8f232fb..eecbffc 100644
--- a/docs/topics/tools.md
+++ b/docs/topics/tools.md
@@ -1,39 +1,144 @@
-!!! note
- This content is currently being refactored
+# Tools
-### Tools
+By popular demand, Rigging includes a basic helper layer to provide the concept of "Tools" to a model. It's
+debatable whether this approach (or more specifically the way we present it to narrative models) is a good idea,
+but it's a fast way to extend the capability of your generation into arbitrary code functions that you define.
-```python
+## Writing Tools
+
+Much like models, tools inherit from a base [`rg.Tool`][rigging.tool.Tool] class. These subclasses are required
+to provide at least 1 function along with a name and description property to present to the LLM during generation.
+
+Every function you define and the parameters within are required to carry both type hints and annotations that
+describe their function.
+
+```py
from typing import Annotated
+import requests
import rigging as rg
class WeatherTool(rg.Tool):
- @property
- def name(self) -> str:
- return "weather"
-
- @property
- def description(self) -> str:
- return "A tool to get the weather for a location"
+ name = "weather"
+ description = "A tool to get the weather for a location"
def get_for_city(self, city: Annotated[str, "The city name to get weather for"]) -> str:
- print(f"[=] get_for_city('{city}')")
- return f"The weather in {city} is nice today"
+ try:
+ city = city.replace(" ", "+")
+ return requests.get(f"http://wttr.in/{city}?format=2").text
+ except:
+ return "Failed to call the API"
+```
+Integrating tools into the generation process is as easy as passing an instantiation
+of your tool class to the [`PendingChat.using()`][rigging.chat.PendingChat.using] method.
+
+```py
chat = (
- rg.get_generator("mistral/mistral-tiny")
- .chat(
- [
- {"role": "user", "content": "What is the weather in London?"},
- ]
- )
- .using(WeatherTool(), force=True)
+ rg.get_generator("gpt-3.5-turbo")
+ .chat("What is the weather in London?")
+ .using(WeatherTool(), force=True) # (1)!
.run()
)
-# [=] get_for_city('London')
-
print(chat.last.content)
+# The current weather in London is 57°F with a light breeze of 2mph.
+```
+
+1. The use of `force=True` here is optional, but results in the internal generation
+ ensuring at least one tool is called before the generation completes.
+
+If/when the LLM elects to emit a valid tool call in Riggings format, it will
+side-step, process the arguments, ensure they conform to your function spec,
+and execute the desired function. Results will be injected back into the chat
+and the final message which does not include any tool calls will trigger the end
+of the generation process.
+
+??? tip "Tool State"
+
+ It's worth noting that tools are passed as instantiated classes into Rigging,
+ which means your tool is free to carry state about it's operations as time
+ progresses. Whether this is a good software design decision is up to you.
+
+
+## Under the Hood
+
+If you are curious what is occuring "under the hood" (as you should), you can
+print the entire conversation text and see our injected system prompt of
+instructions for using a tool, along with the auto-generated XML description
+of the `WeatherTool` we supplied to the model
+
+```xml
+[system]: # Tool Use
+In this environment you have access to a set of tools you can use to improve your responses.
+
+## Tool Call Format
+
+
+
+
+
+
+
+
+
+## Available Tools
+
+
+
+
+
+
+
+
+
+
+
+
+You can use any of the available tools by responding in the call format above. The XML will be
+parsed and the tool(s) will be executed with the parameters you provided. The results of each
+tool call will be provided back to you before you continue the conversation. You can execute
+multiple tool calls by continuing to respond in the format above until you are finished.
+Function calls take explicit values and are independent of each other. Tool calls cannot share,
+re-use, and transfer values between eachother. The use of placeholders is forbidden.
+
+The user will not see the results of your tool calls, only the final message of your conversation.
+Wait to perform your full response until after you have used any required tools. If you intend to
+use a tool, please do so before you continue the conversation.
+
+
+[user]: What is the weather in London?
+
+[assistant]:
+
+ London
+
+
+
+[user]:
+
+ ☀️ 🌡️+57°F 🌬️→2mph
+
+
+
+
+[assistant]: The current weather in London is 57°F with a light breeze of 2mph.
+```
+
+Every tool assigned to the `PendingChat` will be processed by calling [`.get_description()`][rigging.tool.Tool.get_description]
+and a minimal tool-use prompt will be injected as, or appended to, the system message.
+
+!!! warning "The Curse of Complexity"
+
+ Everything we add to the context window of a model introduces variance to it's outputs.
+ Even the way we name parameters and tools can have a large impact on whether a model
+ elects to output a tool call and how frequently or late it does so. For this reason
+ tool calling in Rigging might not be the best way to accomplish your goals.
+
+ Consider different approaches to your problem like isolating fixed input/output
+ pairs and building a dedicated generation process around those, or pushing the
+ model to think more about selecting from a series of "actions" it should take
+ rather than "tools" it should use are part of a conversation.
-# "Based on the information I've received, the weather in London is nice today."
-```
\ No newline at end of file
+ You also might consider a pipeline where incoming messages are scanned against
+ a list of possible tools, and fork the generation process with something like
+ [`.then`][rigging.chat.PendingChat.then] instead.
\ No newline at end of file
diff --git a/examples/_shared.py b/examples/_shared.py
deleted file mode 100644
index eb34d45..0000000
--- a/examples/_shared.py
+++ /dev/null
@@ -1,42 +0,0 @@
-import pathlib
-import sys
-import typing as t
-
-from loguru import logger
-
-g_configured: bool = False
-
-LogLevelList = ["trace", "debug", "info", "success", "warning", "error", "critical"]
-LogLevelLiteral = t.Literal["trace", "debug", "info", "success", "warning", "error", "critical"]
-
-
-def configure_logging(
- log_level: str,
- log_file: pathlib.Path | None = None,
- log_file_level: LogLevelLiteral = "debug",
-) -> None:
- global g_configured
-
- if g_configured:
- return
-
- logger.enable("rigging")
-
- logger.level("TRACE", color="", icon="[T]")
- logger.level("DEBUG", color="", icon="[_]")
- logger.level("INFO", color="", icon="[=]")
- logger.level("SUCCESS", color="", icon="[+]")
- logger.level("WARNING", color="", icon="[-]")
- logger.level("ERROR", color="", icon="[!]")
- logger.level("CRITICAL", color="", icon="[x]")
-
- custom_format = "{time:HH:mm:ss.SSS} | {level.icon} {message}"
-
- logger.remove()
- logger.add(sys.stderr, format=custom_format, level=log_level.upper())
-
- if log_file is not None:
- logger.add(log_file, format=custom_format, level=log_file_level.upper())
- logger.info(f"Logging to {log_file}")
-
- g_configured = True
diff --git a/examples/bandit.py b/examples/bandit.py
index 2409072..42ed470 100644
--- a/examples/bandit.py
+++ b/examples/bandit.py
@@ -1,11 +1,11 @@
import abc
import asyncio
import pathlib
+import random
import re
import typing as t
from dataclasses import dataclass, field
-import _shared
import asyncssh
import click
import requests # type: ignore
@@ -13,6 +13,9 @@
from pydantic import StringConstraints
import rigging as rg
+from rigging import logging
+
+# Constants
SSH_HOST = "bandit.labs.overthewire.org"
SSH_PORT = 2220
@@ -27,12 +30,10 @@
# Helpers
-BASE_DOCS_URL = "https://overthewire.org/wargames/bandit"
-
def get_bandit_level_description(level: int) -> str:
search = r"Level Goal(.+)
", "").replace("
", "").strip()
@@ -57,7 +58,7 @@ async def execute_ssh(
) -> str:
logger.debug(f"Executing:\n{command}")
- async with conn.create_process("/bin/bash") as process:
+ async with conn.create_process("/bin/bash") as process: # type: ignore
process.stdin.write(command + "\n" + "exit" + "\n")
try:
stdout_output, stderr_output = await asyncio.wait_for(process.communicate(), timeout=timeout)
@@ -65,9 +66,6 @@ async def execute_ssh(
process.terminate()
stdout_output = ""
stderr_output = "[command timed out]"
- except asyncssh.misc.ConnectionLost:
- stdout_output = ""
- stderr_output = "[connection lost]"
output = f"{stdout_output}\n{stderr_output}".strip()
@@ -85,20 +83,20 @@ async def execute_ssh(
# Models
str_strip = t.Annotated[str, StringConstraints(strip_whitespace=True)]
-str_upper = t.Annotated[str, StringConstraints(to_upper=True)]
class Action(rg.Model, abc.ABC):
+ @abc.abstractmethod
async def run(self, state: "State") -> str:
- raise NotImplementedError
+ ...
-class UpdateMyGoal(Action):
+class UpdateGoal(Action):
goal: str_strip
@classmethod
def xml_example(cls) -> str:
- return UpdateMyGoal(goal="My new goal").to_pretty_xml()
+ return UpdateGoal(goal="My new goal").to_pretty_xml()
async def run(self, state: "State") -> str:
logger.success(f"[{state.id}] Updating goal to '{self.goal}'")
@@ -168,6 +166,7 @@ def xml_example(cls) -> str:
return TryCommand(content="whoami | grep abc").to_pretty_xml()
async def run(self, state: "State") -> str:
+ logger.info(f"[{state.id}] Trying command:\n{self.content}")
assert state.client is not None, "No SSH connection available"
return await execute_ssh(state.client, self.content)
@@ -194,9 +193,9 @@ async def run(self, state: "State") -> str:
return f"Success! You are now on level {next_level}."
-Actions = UpdateMyGoal | SaveMemory | RecallMemory | DeleteMemory | PinToTop | TryCommand | SubmitPassword
+Actions = UpdateGoal | SaveMemory | RecallMemory | DeleteMemory | PinToTop | TryCommand | SubmitPassword
ActionsList: list[type[Actions]] = [
- UpdateMyGoal,
+ UpdateGoal,
SaveMemory,
RecallMemory,
DeleteMemory,
@@ -211,6 +210,7 @@ class State:
# Required
id: int
max_actions: int
+ base_chat: rg.PendingChat
# Progress
result: str | None = ""
@@ -239,7 +239,7 @@ async def prep(self, level: int, password: str) -> None:
self.goals.append(f"Find and submit the password to the next level ({self.level + 1})")
async def step(self) -> None:
- logger.info(f"Processing {len(self.next_actions)} action(s)")
+ logger.debug(f"Processing {len(self.next_actions)} action(s)")
for action in self.next_actions:
self.history.append((action, await action.run(self)))
self.next_actions.clear()
@@ -310,7 +310,7 @@ def get_prompt(self, max_history: int = MAX_HISTORY) -> str:
## Goal
When you believe you've accomplished your current goal:
-{UpdateMyGoal.xml_example()}
+{UpdateGoal.xml_example()}
## Commands
@@ -322,8 +322,6 @@ def get_prompt(self, max_history: int = MAX_HISTORY) -> str:
When you have the password to the next level, provide it so the system can authenticate you to the next level:
{SubmitPassword.xml_example()}
-## Examples
-
---
# Guidance
@@ -342,14 +340,14 @@ def get_prompt(self, max_history: int = MAX_HISTORY) -> str:
- You are executing the commands on the remote host, not locally
- Passwords look like long base64 strings, watch for them
-Output exactly 1 new action from the list above in your response. Prior action results are displayed above.
+Output a new action from the list above in your response. Prior action results are displayed above.
"""
# CLI + Core
-async def agent_loop(base_chat: rg.PendingChat, state: State) -> State:
+async def agent_loop(state: State) -> State:
async def parse_actions(chat: rg.Chat) -> rg.Chat | None:
parsed: list[Actions] = []
for action_cls in ActionsList:
@@ -369,15 +367,17 @@ async def parse_actions(chat: rg.Chat) -> rg.Chat | None:
return None
while not state.result:
- await base_chat.fork(state.get_prompt()).then(parse_actions).arun()
+ await state.base_chat.fork(state.get_prompt()).then(parse_actions).arun()
await state.step()
return state
-async def core_loop(
+async def main(
level: int, password: str, generator_id: str, max_iterations: int, parallel_agents: int, max_actions: int
) -> None:
+ logger.success(f"Starting Bandit with {parallel_agents} agents")
+
# Prepare our objects
generator = rg.get_generator(generator_id)
@@ -388,11 +388,14 @@ async def core_loop(
for i in range(max_iterations):
logger.success(f"Starting level {level}")
- states: list[State] = [State(id=i, max_actions=max_actions) for i in range(parallel_agents)]
+ states: list[State] = [
+ State(id=i, max_actions=max_actions, base_chat=base_chat.with_(temperature=random.uniform(0.25, 1)))
+ for i in range(parallel_agents)
+ ]
for state in states:
await state.prep(level, password)
- loops = [asyncio.create_task(agent_loop(base_chat, state)) for state in states]
+ loops = [asyncio.create_task(agent_loop(state)) for state in states]
_, pending = await asyncio.wait(loops, return_when=asyncio.FIRST_COMPLETED)
for task in pending:
@@ -402,6 +405,8 @@ async def core_loop(
level = finished_state.level + 1
password = finished_state.result or ""
+ logger.success("Finished Bandit.")
+
@click.command()
@click.option("-l", "--level", type=int, default=1, help="Starting level (1-34)")
@@ -425,7 +430,7 @@ async def core_loop(
"-p",
"--parallel-agents",
type=int,
- default=5,
+ default=3,
help="Number of parallel agents",
)
@click.option(
@@ -433,17 +438,17 @@ async def core_loop(
"--max-actions",
type=int,
default=3,
- help="Maximum number of actions allowed per generation",
+ help="Maximum number of actions allowed per generation round",
)
@click.option(
"--log-level",
- type=click.Choice(_shared.LogLevelList),
+ type=click.Choice(logging.LogLevelList),
default="info",
)
@click.option("--log-file", type=click.Path(path_type=pathlib.Path), default="bandit.log")
@click.option(
"--log-file-level",
- type=click.Choice(_shared.LogLevelList),
+ type=click.Choice(logging.LogLevelList),
default="trace",
)
def cli(
@@ -453,12 +458,16 @@ def cli(
max_iterations: int,
parallel_agents: int,
max_actions: int,
- log_level: str,
+ log_level: logging.LogLevelLiteral,
log_file: pathlib.Path,
- log_file_level: _shared.LogLevelLiteral,
+ log_file_level: logging.LogLevelLiteral,
) -> None:
- _shared.configure_logging(log_level, log_file, log_file_level)
- asyncio.run(core_loop(level, password, generator_id, max_iterations, parallel_agents, max_actions))
+ """
+ Rigging example for agentic exploitation of OverTheWire's Bandit wargame.
+ """
+
+ logging.configure_logging(log_level, log_file, log_file_level)
+ asyncio.run(main(level, password, generator_id, max_iterations, parallel_agents, max_actions))
if __name__ == "__main__":
diff --git a/examples/bandit_single.py b/examples/bandit_single.py
deleted file mode 100644
index 4af9a3c..0000000
--- a/examples/bandit_single.py
+++ /dev/null
@@ -1,477 +0,0 @@
-import abc
-import asyncio
-import pathlib
-import re
-import socket
-import time
-import typing as t
-from dataclasses import dataclass, field
-
-import _shared
-import click
-import paramiko # type: ignore
-import requests # type: ignore
-from loguru import logger
-from pydantic import StringConstraints
-
-import rigging as rg
-
-SSH_HOST = "bandit.labs.overthewire.org"
-SSH_PORT = 2220
-
-MAX_PINS = 10
-MAX_HISTORY = 5
-
-SYSTEM_PROMPT = """\
-You are a senior security expert with years of linux and CTF experience. You are solving a CTF which involves
-locating a password on a server by executing commands via SSH.
-"""
-
-# Helpers
-
-BASE_DOCS_URL = "https://overthewire.org/wargames/bandit"
-
-
-def get_bandit_level_description(level: int) -> str:
- search = r"Level Goal(.+)
", "").replace("", "").strip()
- return re.sub("<.*?>", "", goal)
-
-
-def connect_ssh(level: int, password: str) -> paramiko.SSHClient | None:
- username = f"bandit{level}"
-
- client = paramiko.SSHClient()
- client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
-
- try:
- client.connect(SSH_HOST, SSH_PORT, username=username, password=password)
- logger.success(f"Connected to {SSH_HOST} on port {SSH_PORT} as {username}")
- except paramiko.AuthenticationException:
- logger.error(f"Failed to authenticate as {username} with password {password}")
- return None
-
- return client
-
-
-def execute_ssh(client: paramiko.SSHClient, command: str, *, max_output_len: int = 5_000, timeout: int = 120) -> str:
- logger.debug(f"Executing:\n{command}")
-
- stdin, stdout, stderr = client.exec_command("/bin/bash", timeout=timeout)
- stdin.write(command + "\n")
- stdin.flush()
- stdin.channel.shutdown_write()
-
- time.sleep(1)
-
- try:
- stdout_output = stdout.read().decode(errors="backslashreplace")
- stderr_output = stderr.read().decode(errors="backslashreplace")
- except socket.timeout:
- stdout_output = ""
- stderr_output = "[command timed out]"
-
- output = f"{stdout_output}\n{stderr_output}".strip()
-
- if not output:
- output = "[command finished]"
-
- if len(output) > max_output_len:
- output = output[:max_output_len] + "\n[output truncated]"
-
- logger.debug(f"Output:\n{output}")
-
- return output
-
-
-# Models
-
-str_strip = t.Annotated[str, StringConstraints(strip_whitespace=True)]
-str_upper = t.Annotated[str, StringConstraints(to_upper=True)]
-
-
-class Action(rg.Model, abc.ABC):
- def run(self, state: "State") -> str:
- raise NotImplementedError
-
-
-class UpdateMyGoal(Action):
- goal: str_strip
-
- @classmethod
- def xml_example(cls) -> str:
- return UpdateMyGoal(goal="My new goal").to_pretty_xml()
-
- def run(self, state: "State") -> str:
- user_input = input(f"\nModel wants to set goal to '{self.goal}'? (y/N): ")
- if user_input.lower() != "y":
- self.goal = input("What is the real goal?: ")
- logger.success(f"Updating goal to '{self.goal}'")
- state.goals.append(self.goal)
- return "Goal updated."
-
-
-class SaveMemory(Action):
- key: str_strip = rg.attr()
- content: str_strip
-
- @classmethod
- def xml_example(cls) -> str:
- return SaveMemory(key="my-note", content="Lots of custom data\nKeep this for later.").to_pretty_xml()
-
- def run(self, state: "State") -> str:
- logger.success(f"Storing '{self.key}':\n{self.content}")
- state.memories[self.key] = self.content
- return f"Stored '{self.key}'."
-
-
-class RecallMemory(Action):
- key: str_strip
-
- @classmethod
- def xml_example(cls) -> str:
- return RecallMemory(key="last-thoughts").to_pretty_xml()
-
- def run(self, state: "State") -> str:
- value = state.memories.get(self.key, "Not found.")
- logger.success(f"Recalling '{self.key}'\n{value}")
- return value
-
-
-class DeleteMemory(Action):
- key: str_strip
-
- @classmethod
- def xml_example(cls) -> str:
- return DeleteMemory(key="my-note").to_pretty_xml()
-
- def run(self, state: "State") -> str:
- logger.success(f"Forgetting '{self.key}'")
- state.memories.pop(self.key, None)
- return f"Forgot '{self.key}'."
-
-
-class PinToTop(Action):
- content: str_strip
-
- @classmethod
- def xml_example(cls) -> str:
- return PinToTop(content="This is the auth token: 1234").to_pretty_xml()
-
- def run(self, state: "State") -> str:
- logger.success(f"Pinning '{self.content}'")
- state.pins.append(self.content)
- state.pins = state.pins[:MAX_PINS]
- return "Pinned."
-
-
-class TryCommand(Action):
- content: str_strip
-
- @classmethod
- def xml_example(cls) -> str:
- return TryCommand(content="whoami | grep abc").to_pretty_xml()
-
- def run(self, state: "State") -> str:
- return execute_ssh(state.client, self.content)
-
-
-class SubmitPassword(Action):
- password: str_strip
-
- @classmethod
- def xml_example(cls) -> str:
- return SubmitPassword(password="[long_pw_string]").to_pretty_xml()
-
- def run(self, state: "State") -> str:
- if re.search(r"[a-zA-Z0-9]{32}", self.password) is None:
- return "Invalid password format."
-
- next_level = state.level + 1
- client = connect_ssh(next_level, self.password)
- if client is None:
- return "Failed to connect. Invalid password."
-
- logger.success(f"Got password for level {next_level}: {self.password}")
- state.update_level(next_level, client=client)
-
- return f"Success! You are now on level {next_level}."
-
-
-Actions = UpdateMyGoal | SaveMemory | RecallMemory | DeleteMemory | PinToTop | TryCommand | SubmitPassword
-ActionsList: list[type[Actions]] = [
- UpdateMyGoal,
- SaveMemory,
- RecallMemory,
- DeleteMemory,
- PinToTop,
- TryCommand,
- SubmitPassword,
-]
-
-
-@dataclass
-class State:
- # CTF
- client: paramiko.SSHClient = paramiko.SSHClient()
- level: int = 1
- level_details: str = ""
-
- # Core
- goals: list[str] = field(default_factory=list)
- next_actions: list[Actions] = field(default_factory=list)
-
- # Context
- pins: list[str] = field(default_factory=list)
- memories: dict[str, str] = field(default_factory=dict)
- history: list[tuple[Actions, str]] = field(default_factory=list)
-
- def update_level(
- self, level: int, *, password: str | None = None, client: paramiko.SSHClient | None = None
- ) -> None:
- if client is None and password is not None:
- self.client = connect_ssh(level, password)
- elif client is not None:
- self.client = client
- else:
- raise ValueError("Either password or client must be provided")
-
- self.level = level
- self.level_details = get_bandit_level_description(level + 1)
- self.goals.append(f"Find and submit the password to the next level ({self.level + 1})")
-
- def step(self) -> None:
- logger.info(f"Processing {len(self.next_actions)} action(s)")
- for action in self.next_actions:
- self.history.append((action, action.run(self)))
- self.next_actions.clear()
-
- def get_prompt(self, max_history: int = MAX_HISTORY) -> str:
- memories = "\n".join(self.memories.keys())
- previous_goals = "\n".join(self.goals[:-1] if len(self.goals) > 1 else [])
- current_goal = self.goals[-1]
- history = "\n---\n".join([h[0].to_pretty_xml() + "\n" + h[1] for h in self.history[-max_history:]])
- pinned = "\n".join(self.pins)
- return f"""\
-# Context
-
-
-{self.level}
-
-
-
-{self.level_details}
-
-
-
-{memories or 'No memories yet.'}
-
-
-
-{history or 'No actions taken yet'}
-
-
-
-{pinned or 'No pinned context yet.'}
-
-
-# Goals
-
-
-{previous_goals or 'No previous goals'}
-
-
-
-{current_goal}
-
-
-# Actions
-
-You can take any 1 of the following actions in your response. Use the formats below.
-
-## Memory
-
-You can use the memory actions to store and retrieve long term information as you work. Use memories often to keep track of important information like your planning, analysis, important web responses, etc.
-
-To store a memory:
-{SaveMemory.xml_example()}
-
-To retrieve a memory:
-{RecallMemory.xml_example()}
-
-To forget a memory:
-{DeleteMemory.xml_example()}
-
-## Pinned Context
-
-In addition to long term memories, you can pin important short term information to be displayed before every future action (maximum of {MAX_PINS}). Use this to keep data like tokens, passwords, usernames, endpoints, critical notes, etc.
-
-To pin important information:
-{PinToTop.xml_example()}
-
-## Goal
-
-When you believe you've accomplished your current goal:
-{UpdateMyGoal.xml_example()}
-
-## Commands
-
-To execute a command on the remote host via SSH:
-{TryCommand.xml_example()}
-
-## Password
-
-When you have the password to the next level, provide it so the system can authenticate you to the next level:
-{SubmitPassword.xml_example()}
-
-## Examples
-
----
-
-# Guidance
-
-- Do not use placeholder values in any of your actions.
-- Recall your memories to help you make decisions.
-- Experiment frequently and be adjust your approach to make progress.
-- All actions are useful, be organized and methodical in your approach.
-- Always assume you start in a new /bin/bash shell in the user home directory
-- Add verbosity and failure information to your commands to assist with troubleshooting
-- Prefer using full paths to files and directories
-- Be creative with your solutions, but keep them simple
-- Avoid repeating commands that have already been executed and failed
-- Read output carefully to understand what errors occured and why
-- Use the /tmp directory for any file write operations
-- You are executing the commands on the remote host, not locally
-
-Output exactly 1 new action from the list above in your response. Prior action results are displayed above.
-"""
-
-
-# CLI + Core
-
-
-async def core_loop(level: int, password: str, generator_id: str, max_iterations: int, max_actions: int) -> None:
- logger.success(f"Starting bandit on level {level}")
-
- # Prepare our objects
-
- generator = rg.get_generator(generator_id)
- base_chat = generator.chat(
- [{"role": "system", "content": SYSTEM_PROMPT}],
- ).with_(max_tokens=4096)
-
- client = connect_ssh(level, password)
- state = State(client=client)
- state.update_level(level, client=client)
-
- async def parse_actions(chat: rg.Chat) -> rg.Chat | None:
- parsed: list[Actions] = []
- for action_cls in ActionsList:
- action = chat.last.try_parse(action_cls)
- if action is not None:
- parsed.append(action) # type: ignore
-
- if not parsed:
- logger.warning("Model didn't provide any valid actions")
- return None
-
- parsed = t.cast(list[Actions], [p.model for p in chat.last.parts])
- if len(parsed) > max_actions:
- logger.warning("Model provided multiple actions, taking just the first")
-
- state.next_actions = parsed[:max_actions]
- return None
-
- for i in range(1, max_iterations + 1):
- try:
- logger.info(f"iter {i}/{max_iterations}")
-
- chat = await base_chat.fork(state.get_prompt()).then(parse_actions).arun()
- logger.info(f"Last:\n{chat.last.content}")
- state.step()
-
- except KeyboardInterrupt:
- logger.info("Interrupted")
- check = input("\nSet a new goal? (y/N): ")
- if check.lower() == "y":
- new_goal = input("Enter new goal: ")
- state.goals.append(new_goal)
- else:
- raise
-
- # Final stats
-
- logger.info("bandit complete")
-
- logger.info("Goals:")
- for goal in state.goals:
- logger.info(f" |- {goal}")
-
- logger.info("Memories:")
- for key, value in state.memories.items():
- logger.info(f" |- {key}:\n{value}")
-
-
-@click.command()
-@click.option("-l", "--level", type=int, default=0, help="Starting level (1-34)")
-@click.option("-p", "--password", type=str, default="bandit0", help="Starting password")
-@click.option(
- "-g",
- "--generator-id",
- type=str,
- default="anthropic/claude-3-sonnet-20240229",
- required=True,
- help="Rigging generator identifier (gpt-4, mistral/mistral-medium, etc.)",
-)
-@click.option(
- "-i",
- "--max-iterations",
- type=int,
- default=100,
- help="Maximum number of iterations",
-)
-@click.option(
- "-p",
- "--parallel-agents",
- type=int,
- default=5,
- help="Number of parallel agents",
-)
-@click.option(
- "-m",
- "--max-actions",
- type=int,
- default=3,
- help="Maximum number of actions allowed per generation",
-)
-@click.option(
- "--log-level",
- type=click.Choice(_shared.LogLevelList),
- default="info",
-)
-@click.option("--log-file", type=click.Path(path_type=pathlib.Path), default="bandit.log")
-@click.option(
- "--log-file-level",
- type=click.Choice(_shared.LogLevelList),
- default="trace",
-)
-def cli(
- level: int,
- password: str,
- generator_id: str,
- max_iterations: int,
- parallel_agents: int,
- max_actions: int,
- log_level: str,
- log_file: pathlib.Path,
- log_file_level: _shared.LogLevelLiteral,
-) -> None:
- _shared.configure_logging(log_level, log_file, log_file_level)
- asyncio.run(core_loop(level, password, generator_id, max_iterations, max_actions))
-
-
-if __name__ == "__main__":
- cli()
diff --git a/examples/dvra.py b/examples/dvra.py
new file mode 100644
index 0000000..5867a6d
--- /dev/null
+++ b/examples/dvra.py
@@ -0,0 +1,508 @@
+import abc
+import asyncio
+import json
+import pathlib
+import typing as t
+from dataclasses import dataclass, field
+
+import click
+import httpx
+from loguru import logger
+from pydantic import StringConstraints
+
+import rigging as rg
+from rigging import logging
+
+# Constants
+
+MAX_PINS = 10
+MAX_HISTORY = 5
+
+SYSTEM_PROMPT = """\
+You are a principal application security tester assessing vulnerabilities in web APIs.
+"""
+
+# Models
+
+str_strip = t.Annotated[str, StringConstraints(strip_whitespace=True)]
+str_upper = t.Annotated[str, StringConstraints(to_upper=True)]
+
+
+class Action(rg.Model, abc.ABC):
+ @abc.abstractmethod
+ async def run(self, state: "State") -> str:
+ ...
+
+
+class Header(rg.Model):
+ name: str = rg.attr()
+ value: str_strip
+
+
+class Parameter(rg.Model):
+ name: str = rg.attr()
+ value: str_strip
+
+
+class Request(Action):
+ method: str_upper = rg.attr()
+ path: str = rg.attr()
+ headers: list[Header] = rg.wrapped("headers", rg.element(default=[]))
+ url_params: list[Parameter] = rg.wrapped("url_params", rg.element(default=[]))
+ body: str_strip = rg.element(default="")
+
+ @classmethod
+ def xml_example(cls) -> str:
+ return Request(
+ method="GET",
+ path="/$path",
+ headers=[Header(name="X-Header", value="my-value")],
+ url_params=[Parameter(name="name", value="test-param")],
+ body="$body",
+ ).to_pretty_xml()
+
+ async def run(self, state: "State") -> str:
+ response = await send_request(state.client, self)
+ logger.success(f"{self.method} '{self.path}' -> {response.status_code}")
+ state.traffic.append((self, response))
+ return response.to_pretty_xml()
+
+
+class Response(rg.Model):
+ status_code: int = rg.attr()
+ headers: list[Header] = rg.element(defualt=[])
+ body: str_strip = rg.element(default="")
+
+
+class UpdateGoal(Action):
+ goal: str_strip
+
+ @classmethod
+ def xml_example(cls) -> str:
+ return UpdateGoal(goal="My new goal").to_pretty_xml()
+
+ async def run(self, state: "State") -> str:
+ user_input = input(f"\nModel wants to set goal to '{self.goal}'? (y/N): ")
+ if user_input.lower() != "y":
+ self.goal = input("What is the real goal? (empty for keep existing): ") or self.goal
+ logger.success(f"Updating goal to '{self.goal}'")
+ state.goals.append(self.goal)
+ return "Goal updated."
+
+
+class SaveMemory(Action):
+ key: str_strip = rg.attr()
+ content: str_strip
+
+ @classmethod
+ def xml_example(cls) -> str:
+ return SaveMemory(key="my-note", content="Lots of custom data\nKeep this for later.").to_pretty_xml()
+
+ async def run(self, state: "State") -> str:
+ logger.success(f"Storing '{self.key}':\n{self.content}")
+ state.memories[self.key] = self.content
+ return f"Stored '{self.key}'."
+
+
+class RecallMemory(Action):
+ key: str_strip
+
+ @classmethod
+ def xml_example(cls) -> str:
+ return RecallMemory(key="last-thoughts").to_pretty_xml()
+
+ async def run(self, state: "State") -> str:
+ value = state.memories.get(self.key, "Not found.")
+ logger.success(f"Recalling '{self.key}'\n{value}")
+ return value
+
+
+class DeleteMemory(Action):
+ key: str_strip
+
+ @classmethod
+ def xml_example(cls) -> str:
+ return DeleteMemory(key="my-note").to_pretty_xml()
+
+ async def run(self, state: "State") -> str:
+ logger.success(f"Forgetting '{self.key}'")
+ state.memories.pop(self.key, None)
+ return f"Forgot '{self.key}'."
+
+
+class PinToTop(Action):
+ content: str_strip
+
+ @classmethod
+ def xml_example(cls) -> str:
+ return PinToTop(content="This is the auth token: 1234").to_pretty_xml()
+
+ async def run(self, state: "State") -> str:
+ logger.success(f"Pinning '{self.content}'")
+ state.pins.append(self.content)
+ state.pins = state.pins[:MAX_PINS]
+ return "Pinned."
+
+
+class SetHeaderOnSession(Action):
+ name: str_strip = rg.attr()
+ value: str_strip
+
+ @classmethod
+ def xml_example(cls) -> str:
+ return SetHeaderOnSession(name="X-Header", value="my-value").to_pretty_xml()
+
+ async def run(self, state: "State") -> str:
+ logger.success(f"Adding header '{self.name}' with value '{self.value}'")
+ state.client.headers[self.name] = self.value
+ return "Header added."
+
+
+class ResetSession(Action):
+ @classmethod
+ def xml_example(cls) -> str:
+ return ResetSession().to_pretty_xml()
+
+ async def run(self, state: "State") -> str:
+ logger.success("Resetting session")
+ state.client.headers.clear()
+ return "Session reset."
+
+
+Actions = (
+ UpdateGoal
+ | SaveMemory
+ | RecallMemory
+ | PinToTop
+ | RecallMemory
+ | DeleteMemory
+ | Request
+ | SetHeaderOnSession
+ | ResetSession
+)
+ActionsList: list[type[Action]] = [
+ UpdateGoal,
+ SaveMemory,
+ RecallMemory,
+ PinToTop,
+ RecallMemory,
+ DeleteMemory,
+ Request,
+ SetHeaderOnSession,
+ ResetSession,
+]
+
+
+@dataclass
+class State:
+ # Required
+ client: httpx.AsyncClient
+ max_actions: int
+ base_chat: rg.PendingChat
+
+ # Core
+ goals: list[str] = field(default_factory=list)
+ next_actions: list[Actions] = field(default_factory=list)
+
+ # Context
+ pins: list[str] = field(default_factory=list)
+ memories: dict[str, str] = field(default_factory=dict)
+ traffic: list[tuple[Request, Response]] = field(default_factory=list)
+ history: list[tuple[Actions, str]] = field(default_factory=list)
+
+ async def step(self) -> None:
+ logger.info(f"Processing {len(self.next_actions)} action(s)")
+ for action in self.next_actions:
+ self.history.append((action, await action.run(self)))
+ self.next_actions.clear()
+
+ def get_prompt(self, max_history: int = MAX_HISTORY) -> str:
+ traffic = "\n".join(
+ f"{request.method} {request.path} -> {response.status_code}"
+ for request, response in self.traffic
+ if response.status_code != 404
+ )
+ memories = "\n".join(self.memories.keys())
+ previous_goals = "\n".join(self.goals[:-1] if len(self.goals) > 1 else [])
+ current_goal = self.goals[-1]
+ history = "\n---\n".join([h[0].to_pretty_xml() + "\n" + h[1] for h in self.history[-max_history:]])
+ pinned = "\n".join(self.pins)
+ headers = "\n".join(f"{k}: {v}" for k, v in self.client.headers.items())
+ return f"""\
+# Context
+
+
+{traffic}
+
+
+
+{memories or 'No memories yet.'}
+
+
+
+{history or 'No actions taken yet'}
+
+
+
+{pinned or 'No pinned context yet.'}
+
+
+
+{headers or 'No headers set'}
+
+
+# Goals
+
+
+{previous_goals or 'No previous goals'}
+
+
+
+{current_goal}
+
+
+# Actions
+
+You can take any 1 of the following actions in your response. Use the formats below.
+
+## Memory
+
+You can use the memory actions to store and retrieve long term information as you work. Use memories often to keep track of important information like your planning, analysis, important web responses, etc.
+
+To store a memory:
+{SaveMemory.xml_example()}
+
+To retrieve a memory:
+{RecallMemory.xml_example()}
+
+To forget a memory:
+{DeleteMemory.xml_example()}
+
+## Pinned Context
+
+In addition to long term memories, you can pin important short term information to be displayed before every future action (maximum of {MAX_PINS}). Use this to keep data like tokens, passwords, usernames, endpoints, critical notes, etc.
+
+To pin important information:
+{PinToTop.xml_example()}
+
+## Goal
+
+When you believe you've accomplished your current goal, pick a new one to continue your exploration.
+{UpdateGoal.xml_example()}
+
+## Web Traffic
+
+To issue a web request, use the following format:
+{Request.xml_example()}
+
+To add a permanent header to all future requests (useful for auth tokens):
+{SetHeaderOnSession.xml_example()}
+
+To reset your current session and clear all headers:
+{ResetSession.xml_example()}
+
+# Guidance
+
+- Focus on exploring as much of the application as possible.
+- Do not use placeholder values in any of your actions.
+- Recall your memories to help you make decisions.
+- Pay attention to your previous traffic, avoid making unnecessary requests.
+- Learned important information? Pin it for reference.
+- Experiment frequently and be adjust your approach to make progress.
+- All actions are useful, be organized and methodical in your approach.
+
+Output up to {self.max_actions} new actions from the list above in your response. Prior action results are displayed above.
+"""
+
+
+# Functions
+
+
+def format_http_request(request: httpx.Request) -> str:
+ http_request = f"{request.method} {request.url} HTTP/1.1\n"
+ http_request += "".join(f"{k}: {v}\n" for k, v in request.headers.items())
+ if request.content:
+ http_request += "\n" + request.content.decode("utf-8")
+ return http_request
+
+
+def format_http_response(response: httpx.Response) -> str:
+ http_response = f"HTTP/1.1 {response.status_code} {response.reason_phrase}\n"
+ http_response += "".join(f"{k}: {v}\n" for k, v in response.headers.items())
+ if response.content:
+ http_response += "\n" + response.text
+ return http_response
+
+
+async def send_request(client: httpx.AsyncClient, request: Request) -> Response:
+ try:
+ json_body = json.loads(request.body)
+ except json.JSONDecodeError:
+ json_body = None
+
+ httpx_request = client.build_request(
+ method=request.method,
+ url=request.path,
+ headers={header.name: header.value for header in request.headers},
+ content=request.body if not json_body else None,
+ json=json_body,
+ )
+
+ if not json_body:
+ httpx_request.headers["Content-Type"] = "application/x-www-form-urlencoded"
+
+ logger.trace(f"Request: \n{format_http_request(httpx_request)}")
+ httpx_response = await client.send(httpx_request)
+ logger.trace(f"Response:\n{format_http_response(httpx_response)}")
+
+ return Response(
+ status_code=httpx_response.status_code,
+ headers=[Header(name=name, value=value) for name, value in httpx_response.headers.items()],
+ body=httpx_response.text,
+ )
+
+
+# CLI + Core
+
+
+async def agent_loop(
+ state: State,
+ max_iterations: int,
+) -> None:
+ async def parse_actions(chat: rg.Chat) -> rg.Chat | None:
+ parsed: list[Actions] = []
+ for action_cls in ActionsList:
+ action = chat.last.try_parse(action_cls)
+ if action is not None:
+ parsed.append(action) # type: ignore
+
+ if not parsed:
+ logger.warning("Model didn't provide any valid actions")
+ return None
+
+ parsed = t.cast(list[Actions], [p.model for p in chat.last.parts])
+ if len(parsed) > state.max_actions:
+ logger.warning(f"Model provided more actions than allows {len(parsed)} > {state.max_actions}")
+
+ state.next_actions = parsed[: state.max_actions]
+ return None
+
+ for i in range(1, max_iterations + 1):
+ logger.info(f"Iteration {i}/{max_iterations}")
+ await state.base_chat.fork(state.get_prompt()).then(parse_actions).arun()
+ await state.step()
+
+
+@click.command()
+@click.option(
+ "-G",
+ "--first-goal",
+ type=str,
+ default="Find the API spec, register a user, get authenticated, then exploit.",
+ help="First goal to perform",
+)
+@click.option(
+ "-g",
+ "--generator-id",
+ type=str,
+ default="anthropic/claude-3-sonnet-20240229",
+ required=True,
+ help="Rigging generator identifier (gpt-4, mistral/mistral-medium, etc.)",
+)
+@click.option(
+ "-u",
+ "--base-url",
+ type=str,
+ required=True,
+ help="URL of the target application",
+)
+@click.option(
+ "-p",
+ "--proxy",
+ type=str,
+ help="HTTP proxy to use for requests",
+)
+@click.option(
+ "-i",
+ "--max-iterations",
+ type=int,
+ default=30,
+ help="Maximum number of iterations",
+)
+@click.option(
+ "-m",
+ "--max-actions",
+ type=int,
+ default=3,
+ help="Maximum number of actions allowed per generation round",
+)
+@click.option(
+ "--log-level",
+ type=click.Choice(logging.LogLevelList),
+ default="info",
+)
+@click.option("--log-file", type=click.Path(path_type=pathlib.Path), default="dvra.log")
+@click.option(
+ "--log-file-level",
+ type=click.Choice(logging.LogLevelList),
+ default="trace",
+)
+def cli(
+ first_goal: str,
+ generator_id: str,
+ base_url: str,
+ proxy: str | None,
+ max_iterations: int,
+ max_actions: int,
+ log_level: logging.LogLevelLiteral,
+ log_file: pathlib.Path,
+ log_file_level: logging.LogLevelLiteral,
+) -> None:
+ """
+ Rigging example for agentic exploitation of the Damn Vulnerable Restual API (DVRA).
+ """
+
+ logging.configure_logging(log_level, log_file, log_file_level)
+
+ logger.success("Starting DVRA")
+
+ # Prepare our objects
+
+ generator = rg.get_generator(generator_id)
+ client = httpx.AsyncClient(
+ base_url=base_url,
+ verify=False,
+ proxies=(
+ {
+ "http://": proxy,
+ "https://": proxy,
+ }
+ if proxy
+ else None
+ ),
+ )
+
+ base_chat: rg.PendingChat = generator.chat(
+ [{"role": "system", "content": SYSTEM_PROMPT}],
+ rg.GenerateParams(max_tokens=4096),
+ )
+
+ state = State(client=client, max_actions=max_actions, base_chat=base_chat, goals=[first_goal])
+
+ logger.info("Starting with '{}'", first_goal)
+
+ while True:
+ try:
+ asyncio.run(agent_loop(state, max_iterations))
+ except KeyboardInterrupt:
+ logger.info("Interrupted")
+ check = input("\nSet a new goal? (y/N): ")
+ if check.lower() == "y":
+ new_goal = input("Enter new goal: ")
+ state.goals.append(new_goal)
+ else:
+ raise
+
+
+if __name__ == "__main__":
+ cli()
diff --git a/mkdocs.yml b/mkdocs.yml
index c285add..b309014 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -5,13 +5,23 @@ site_url: https://rigging.dreadnode.io
repo_url: https://github.com/dreadnode/rigging
nav:
- - Home: index.md
+ - Home:
+ - Introduction: home/introduction.md
+ - Getting Started: home/getting-started.md
+ - Workflow: home/workflow.md
+ - Principles: home/principles.md
- Topics:
- - Generators: topics/generators.md
- - Chats: topics/chats.md
- Models: topics/models.md
+ - Generators: topics/generators.md
+ - Chats and Messages: topics/chats-and-messages.md
+ - Completions: topics/completions.md
+ - Callbacks and Mapping: topics/callbacks-and-mapping.md
+ - Async and Batching: topics/async-and-batching.md
- Tools: topics/tools.md
- - Logging: topics/setup_logging.md
+ - Agents: topics/agents.md
+ - Serialization: topics/serialization.md
+ - Logging: topics/logging.md
+ - Cheat Sheet: topics/cheatsheet.md
- API:
- rigging.chat: api/chat.md
- rigging.completion: api/completion.md
@@ -57,7 +67,7 @@ plugins:
docstring_options:
ignore_init_summary: true
docstring_section_style: list
- heading_level: 2
+ heading_level: 3
merge_init_into_class: true
show_signature_annotations: true
show_symbol_type_heading: true
@@ -77,7 +87,8 @@ markdown_extensions:
- pymdownx.snippets
- pymdownx.superfences
- pymdownx.details
- - pymdownx.tabbed
+ - pymdownx.tabbed:
+ alternate_style: true
extra_css:
- stylesheets/extra.css
diff --git a/rigging/__init__.py b/rigging/__init__.py
index 7a83c57..ffb1ce1 100644
--- a/rigging/__init__.py
+++ b/rigging/__init__.py
@@ -1,6 +1,6 @@
from rigging.chat import Chat, PendingChat
from rigging.completion import Completion, PendingCompletion
-from rigging.generator import GenerateParams, Generator, chat, complete, get_generator
+from rigging.generator import GenerateParams, Generator, chat, complete, get_generator, register_generator
from rigging.message import Message, MessageDict, Messages
from rigging.model import Model, attr, element, wrapped
from rigging.tool import Tool
@@ -23,6 +23,7 @@
"complete",
"Completion",
"PendingCompletion",
+ "register_generator",
]
from loguru import logger
diff --git a/rigging/chat.py b/rigging/chat.py
index 7307309..958212f 100644
--- a/rigging/chat.py
+++ b/rigging/chat.py
@@ -125,6 +125,16 @@ def conversation(self) -> str:
"""Returns a string representation of the chat."""
return "\n\n".join([str(m) for m in self.all])
+ @property
+ def message_dicts(self) -> list[dict[str, MessageDict]]:
+ """
+ Returns the chat as a minimal dictionary
+
+ Returns:
+ The chat as a list of messages with roles and content.
+ """
+ return [m.model_dump(include={"role", "content"}) for m in self.all]
+
def meta(self, **kwargs: t.Any) -> "Chat":
"""
Updates the metadata of the chat with the provided key-value pairs.
@@ -585,14 +595,14 @@ def until(
self,
callback: UntilMessageCallback,
*,
- attempt_recovery: bool = False,
+ attempt_recovery: bool = True,
drop_dialog: bool = True,
max_rounds: int = DEFAULT_MAX_ROUNDS,
) -> "PendingChat":
"""
Registers a callback to participate in validating the generation process.
- ```python
+ ```py
# Takes the next message being generated, and returns whether or not to continue
# generating new messages in addition to a list of messages to append before continuing
@@ -627,8 +637,7 @@ def callback(message: Message) -> tuple[bool, list[Message]]:
def using(
self,
- tool: Tool | t.Sequence[Tool],
- *,
+ *tools: Tool,
force: bool = False,
attempt_recovery: bool = True,
drop_dialog: bool = False,
@@ -639,7 +648,7 @@ def using(
Adds a tool or a sequence of tools to participate in the generation process.
Args:
- tool: The tool or sequence of tools to be added.
+ tools: The tool or sequence of tools to be added.
force: Whether to force the use of the tool(s) at least once.
attempt_recovery: Whether to attempt recovery if the tool(s) fail by providing
validation feedback to the model before the next round.
@@ -653,7 +662,7 @@ def using(
The updated PendingChat object.
"""
- self.until_tools += tool if isinstance(tool, t.Sequence) else [tool]
+ self.until_tools += tools
self.inject_tool_prompt = inject_prompt or self.inject_tool_prompt
self.force_tool = force
if next((c for c in self.until_callbacks if c[0] == self._until_tools_callback), None) is None:
@@ -800,6 +809,7 @@ def _until(
# for now with the knowledge that behavior might be a bit
# unpredictable.
def _process(self) -> t.Generator[list[Message], Message, list[Message]]:
+ self._pre_run()
first_response = yield []
new_messages = [first_response]
for callback, reset_between, drop_internal, max_rounds in self.until_callbacks:
@@ -809,11 +819,10 @@ def _process(self) -> t.Generator[list[Message], Message, list[Message]]:
def _post_run(self, chats: list[Chat]) -> list[Chat]:
for callback in self.post_run_callbacks:
- if isinstance(callback, ThenChatCallback):
- chats = [callback(chat) or chat for chat in chats]
- elif isinstance(callback, MapChatCallback):
+ if isinstance(callback, MapChatCallback):
chats = callback(chats)
-
+ elif isinstance(callback, ThenChatCallback):
+ chats = [callback(chat) or chat for chat in chats]
return chats
async def _apost_run(self, chats: list[Chat]) -> list[Chat]:
@@ -823,11 +832,11 @@ async def _apost_run(self, chats: list[Chat]) -> list[Chat]:
raise ValueError("Cannot use async then()/map() callbacks inside a non-async run call")
for callback in self.post_run_callbacks:
- if isinstance(callback, AsyncThenChatCallback):
+ if isinstance(callback, AsyncMapChatCallback):
+ chats = await callback(chats)
+ elif isinstance(callback, AsyncThenChatCallback):
updated = await asyncio.gather(*[callback(chat) for chat in chats])
chats = [updated[i] or chat for i, chat in enumerate(chats)]
- elif isinstance(callback, AsyncMapChatCallback):
- chats = await callback(chats)
return chats
diff --git a/rigging/completion.py b/rigging/completion.py
index 45a82c0..b4da0f4 100644
--- a/rigging/completion.py
+++ b/rigging/completion.py
@@ -309,7 +309,7 @@ def until(
"""
Registers a callback to participate in validating the generation process.
- ```python
+ ```py
# Takes the generated text, and returns whether or not to retry generation.
def callback(text: str) -> bool:
diff --git a/rigging/logging.py b/rigging/logging.py
index 8b7a265..7fd5f73 100644
--- a/rigging/logging.py
+++ b/rigging/logging.py
@@ -1,7 +1,7 @@
"""
-We use loguru for logging. This module provides a function to configure the logging settings.
+We use loguru for logging. This module provides a function to configure logging handlers.
-To enable rigging logging, call `logger.enable("rigging")` after importing the module.
+To just enable rigging logs to flow, call `logger.enable("rigging")` after importing the module.
"""
import pathlib
@@ -14,26 +14,22 @@
LogLevelList = ["trace", "debug", "info", "success", "warning", "error", "critical"]
LogLevelLiteral = t.Literal["trace", "debug", "info", "success", "warning", "error", "critical"]
+"""Valid logging levels."""
def configure_logging(
- log_level: str,
+ log_level: LogLevelLiteral,
log_file: pathlib.Path | None = None,
log_file_level: LogLevelLiteral = "debug",
) -> None:
"""
- Configures the loguru settings for the rigging module.
-
- This is optional, and calling `logger.enable("rigging")` will enable the logging
- and you can control the formatting and log levels using the loguru API.
+ Configures common loguru handlers.
Args:
- log_level: The desired log level. Valid values are 'TRACE', 'DEBUG', 'INFO',
- 'SUCCESS', 'WARNING', 'ERROR', and 'CRITICAL'.
+ log_level: The desired log level.
log_file: The path to the log file. If None, logging
will only be done to the console.
- log_file_level: The log level for the log file. Valid values
- are 'TRACE', 'DEBUG', 'INFO', 'SUCCESS', 'WARNING', 'ERROR', and 'CRITICAL'.
+ log_file_level: The log level for the log file.
"""
global g_configured
@@ -50,11 +46,6 @@ def configure_logging(
logger.level("ERROR", color="", icon="[!]")
logger.level("CRITICAL", color="", icon="[x]")
- # Default format:
- # "{time:YYYY-MM-DD HH:mm:ss.SSS} | "
- # "{level: <8} | "
- # "{name}:{function}:{line} - {message}",
-
custom_format = "{time:HH:mm:ss.SSS} | {level.icon} {message}"
logger.remove()
diff --git a/rigging/tool.py b/rigging/tool.py
index 7cd0afe..99e381c 100644
--- a/rigging/tool.py
+++ b/rigging/tool.py
@@ -11,6 +11,11 @@
from rigging.model import Model
SUPPORTED_TOOL_ARGUMENT_TYPES = int | float | str | bool
+"""Supported types for tool arguments."""
+
+SUPPORTED_TOOL_ARGUMENT_TYPES_LIST = [int, float, str, bool]
+"""Supported types for tool arguments as a list."""
+
ToolArgumentTypesCast = {
"int": int,
"float": float,
@@ -134,7 +139,7 @@ class Tool:
You should subclass this to define your own tools:
- ```python
+ ```py
def Hammer(Tool):
name = "Hammer"
description = "A tool for hitting things."
@@ -270,8 +275,10 @@ def get_description(self) -> ToolDescription:
f'Parameters must be annotated like Annotated[, ""] ({formatted_name})'
)
- if annotation_args[0] not in SUPPORTED_TOOL_ARGUMENT_TYPES.__args__: # type: ignore
- raise TypeError(f"Parameters must be annotated with supported types ({formatted_name})")
+ if annotation_args[0] not in SUPPORTED_TOOL_ARGUMENT_TYPES_LIST:
+ raise TypeError(
+ f"Parameters must be annotated with one of these types: {SUPPORTED_TOOL_ARGUMENT_TYPES_LIST} ({formatted_name})"
+ )
type_name = annotation_args[0].__name__
description = annotation_args[1]