diff --git a/packages/fdr-sdk/src/navigation/utils/index.ts b/packages/fdr-sdk/src/navigation/utils/index.ts
index d4a40c830e..5a595bc090 100644
--- a/packages/fdr-sdk/src/navigation/utils/index.ts
+++ b/packages/fdr-sdk/src/navigation/utils/index.ts
@@ -4,5 +4,7 @@ export * from "./createBreadcrumb";
export * from "./findNode";
export * from "./getApiReferenceId";
export * from "./getNoIndexFromFrontmatter";
+export * from "./toApis";
+export * from "./toPages";
export * from "./toRootNode";
export * from "./toUnversionedSlug";
diff --git a/packages/fdr-sdk/src/navigation/utils/toApis.ts b/packages/fdr-sdk/src/navigation/utils/toApis.ts
new file mode 100644
index 0000000000..3ff05c2ce1
--- /dev/null
+++ b/packages/fdr-sdk/src/navigation/utils/toApis.ts
@@ -0,0 +1,13 @@
+import { mapValues } from "es-toolkit";
+import { ApiDefinition } from "../..";
+import { DocsV2Read } from "../../client";
+
+export function toApis(docs: DocsV2Read.LoadDocsForUrlResponse) {
+ return mapValues(docs.definition.apis, (api) =>
+ ApiDefinition.ApiDefinitionV1ToLatest.from(api, {
+ useJavaScriptAsTypeScript: false,
+ alwaysEnableJavaScriptFetch: false,
+ usesApplicationJsonInFormDataValue: false,
+ }).migrate(),
+ );
+}
diff --git a/packages/fdr-sdk/src/navigation/utils/toPages.ts b/packages/fdr-sdk/src/navigation/utils/toPages.ts
new file mode 100644
index 0000000000..728ae5840f
--- /dev/null
+++ b/packages/fdr-sdk/src/navigation/utils/toPages.ts
@@ -0,0 +1,6 @@
+import { mapValues } from "es-toolkit";
+import { DocsV2Read } from "../../client";
+
+export function toPages(docs: DocsV2Read.LoadDocsForUrlResponse) {
+ return mapValues(docs.definition.pages, (page) => page.markdown);
+}
diff --git a/packages/ui/fern-docs-search-server/package.json b/packages/ui/fern-docs-search-server/package.json
index 1a2d0e4352..628c53e138 100644
--- a/packages/ui/fern-docs-search-server/package.json
+++ b/packages/ui/fern-docs-search-server/package.json
@@ -39,11 +39,15 @@
"@fern-ui/fern-docs-mdx": "workspace:*",
"algoliasearch": "^5.8.1",
"es-toolkit": "^1.24.0",
+ "html-to-text": "^9.0.5",
+ "marked": "^5.1.0",
"pnpm": "^9.12.1",
"ts-essentials": "^10.0.1"
},
"devDependencies": {
"@fern-platform/configs": "workspace:*",
+ "@types/html-to-text": "^9.0.1",
+ "@types/marked": "^5.0.0",
"@types/node": "^18.7.18",
"depcheck": "^1.4.3",
"eslint": "^8.56.0",
diff --git a/packages/ui/fern-docs-search-server/src/algolia/__test__/__snapshots__/humanloop.test.ts.snap b/packages/ui/fern-docs-search-server/src/algolia/__test__/__snapshots__/humanloop.test.ts.snap
index c21e288d54..7ade3c52d3 100644
--- a/packages/ui/fern-docs-search-server/src/algolia/__test__/__snapshots__/humanloop.test.ts.snap
+++ b/packages/ui/fern-docs-search-server/src/algolia/__test__/__snapshots__/humanloop.test.ts.snap
@@ -40,6 +40,38 @@ You can learn more about the challenges of AI development and how Humanloop solv
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Getting Started",
+ ],
+ "content": "Humanloop enables AI and product teams to develop LLM-based applications that are reliable and scalable.
+
+Principally, it is an evaluation framework that enables you to rigorously measure and improve LLM performance during development
+and in production and a collaborative workspace where engineers, PMs and subject matter experts improve prompts, tools and agents
+together.
+
+By adopting Humanloop, teams save 6-8 engineering hours per project each week and they feel confident that their AI is reliable.
+
+[file:e984cd1c-3aa7-4012-ba95-e5c09324ed79]
+
+
+The power of Humanloop lies in its integrated approach to AI development. Evaluation, monitoring and prompt engineering in one
+integrated platform enables you to understand system performance and take the actions needed to fix it.
+
+The SDK slots seamlessly into your existing code-based orchestration and the user-friendly interface allows both developers and
+non-technical stakeholders to adjust the AI together.
+
+You can learn more about the challenges of AI development and how Humanloop solves them in Why Humanloop?
+[/docs/v5/getting-started/why-humanloop].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/getting-started/overview",
+ "title": "Overview",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -57,6 +89,82 @@ You can learn more about the challenges of AI development and how Humanloop solv
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Getting Started",
+ ],
+ "content": "LLMS BREAK TRADITIONAL SOFTWARE PROCESSES
+
+The principal way you "program" LLMs is through natural language instructions called prompts. There's a plethora of techniques
+needed to prompt the models to work robustly, reliably and with the correct knowledge.
+
+Developing, managing and evaluating prompts for LLMs is surprisingly hard and dissimilar to traditional software in the following
+ways:
+
+ * Subject matter experts matter more than ever. As LLMs are being applied to all different domains, the people that know how they
+ should best perform are rarely the software engineers but the experts in that field.
+ * AI output is often non-deterministic. Innocuous changes to the prompts can cause unforeseen issues elsewhere.
+ * AI outputs are subjective. It’s hard to measure how well products are working and so, without robust evaluation, larger
+ companies simply can’t trust putting generative AI in production.
+
+Bad workflows for generative AI are costing you through wasted engineering effort and delays to launch
+[file:602d58f3-1208-4fa3-bc95-c307d9108bd6]
+
+Many companies struggle to enable the collaboration needed between product leaders, subject matter experts and engineers. Often
+they'll rely on a hodge-podge of tools like the OpenAI Playground, custom scripts and complex spreadsheets. The process is slow
+and error-prone, wasting engineering time and leading to long delays and feelings of uncertainty.
+
+
+
+
+HUMANLOOP SOLVES THE MOST CRITICAL WORKFLOWS AROUND PROMPT ENGINEERING AND EVALUATION
+
+We give you an interactive environment where your domain experts, product managers and engineers can work together to iterate on
+prompts. Coupled with this are tools for rigorously evaluating the performance of your AI systems.
+
+Coding best practices still apply. All your assets are strictly versioned and can be serialised to work with existing systems like
+git and your CI/CD pipeline. Our TypeScript and Python SDKs seamlessly integrate with your existing codebases.
+
+Companies like Duolingo and AmexGBT use Humanloop to manage their prompt development and evaluation so they can produce
+high-quality AI features and be confident that they work appropriately.
+
+> “We implemented Humanloop at a crucial moment for Twain when we had to develop and test many new prompts for a new feature
+> release. I cannot imagine how long it would have taken us to release this new feature without Humanloop.” – Maddy Ralph, Prompt
+> Engineer at Twain
+
+Check out more detailed case study pages [https://humanloop.com/customers] for more real world examples of the impact of
+Humanloop.
+
+
+
+
+WHO'S IT FOR?
+
+Humanloop is an enterprise-grade stack for AI and product teams. We are SOC-2 compliant, offer self-hosting and never train on
+your data.
+
+Product owners and subject matter experts appreciate that the Humanloop enables them to direct the AI behavior through the
+intuitive UI.
+
+Developers find that Humanloop SDK/API slots well into existing code-based LLM orchestration without forcing unhelpful
+abstractions upon them, while removing bottlenecks around updating prompts and running evaluations.
+
+With Humanloop, companies are overcoming the challenges of building with AI and shipping groundbreaking applications with
+confidence: By giving companies the right tools, Humanloop dramatically accelerates their AI adoption and makes it easy for best
+practices to spread around an organization.
+
+> “Our teams use Humanloop as our development playground to try out various language models, develop our prompts, and test
+> performance. We are still in the official onboarding process but Humanloop is already an essential part of our AI R&D process.“
+> – American Express Global Business Travel",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/getting-started/why-humanloop",
+ "title": "Why Humanloop?",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -180,6 +288,96 @@ Using the Prompt Editor will use your OpenAI credits in the same way that the Op
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Getting Started",
+ ],
+ "content": "#### Create a Humanloop Account
+
+If you haven’t already, create an account or log in to Humanloop
+
+ADD AN OPENAI API KEY
+
+If you’re the first person in your organization, you’ll need to add an API key to a model provider.
+
+ 1. Go to OpenAI and grab an API key [https://platform.openai.com/api-keys]
+ 2. In Humanloop Organization Settings [https://app.humanloop.com/account/api-keys] set up OpenAI as a model provider.
+
+Using the Prompt Editor will use your OpenAI credits in the same way that the OpenAI playground does. Keep your API keys for
+Humanloop and the model providers private.
+
+
+GET STARTED
+
+### Create a Prompt File
+
+When you first open Humanloop you’ll see your File navigation on the left. Click ‘+ New’ and create a Prompt.
+
+[file:ad732e1d-77a8-4576-9933-1db6f9d9d28f]
+
+In the sidebar, rename this file to "Comedian Bot" now or later.
+
+
+CREATE THE PROMPT TEMPLATE IN THE EDITOR
+
+The left hand side of the screen defines your Prompt – the parameters such as model, temperature and template. The right hand side
+is a single chat session with this Prompt.
+
+[file:b9ed95cc-edc2-4c49-b8d3-4f164a083123]
+
+Click the “+ Message” button within the chat template to add a system message to the chat template.
+
+[file:5d7dd0e4-73f6-41b9-ad2b-60ba9f349f26]
+
+Add the following templated message to the chat template.
+
+You are a funny comedian. Write a joke about {{topic}}.
+
+
+This message forms the chat template. It has an input slot called topic (surrounded by two curly brackets) for an input value that
+is provided each time you call this Prompt.
+
+On the right hand side of the page, you’ll now see a box in the Inputs section for topic.
+
+ 1. Add a value for topic e.g. music, jogging, whatever
+ 2. Click Run in the bottom right of the page
+
+This will call OpenAI’s model and return the assistant response. Feel free to try other values, the model is very funny.
+
+You now have a first version of your prompt that you can use.
+
+
+COMMIT YOUR FIRST VERSION OF THIS PROMPT
+
+ 1. Click the Commit button
+ 2. Put “initial version” in the commit message field
+ 3. Click Commit
+
+[file:386f75eb-c97a-4923-9823-168a14848719]
+
+
+VIEW THE LOGS
+
+Under the Prompt File, click ‘Logs’ to view all the generations from this Prompt
+
+Click on a row to see the details of what version of the prompt generated it. From here you can give feedback to that generation,
+see performance metrics, open up this example in the Editor, or add this log to a dataset.
+
+[file:f2b286b8-7fcf-4323-9308-6ca5fbc22e44]
+
+
+NEXT STEPS
+
+Well done! You've now created your first Prompt. If you look around it might seem a bit empty at the moment.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/tutorials/quickstart",
+ "title": "Quickstart Tutorial",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -297,6 +495,85 @@ Prompts, Tools and Evaluators are the core building blocks of your AI features o
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Getting Started",
+ "Concepts",
+ ],
+ "content": "Prompts, Tools and Evaluators are the core building blocks of your AI features on Humanloop:
+
+ * Prompts [./prompts]: Prompts define how a large language model behaves.
+ * Tools [./tools]: Tools are functions that can extend your LLMs with access to external data sources and enabling them to take
+ actions.
+ * Evaluators [./evaluators]: Evaluators on Humanloop are functions that can be used to judge the output of Prompts, Tools or
+ other Evaluators.
+
+[file:c0b65500-b3ad-4d8a-80bb-90c304430193]
+
+
+FILE PROPERTIES
+
+These core building blocks of Prompts, Tools and Evaluators are represented as different file types within a flexible filesystem
+in your Humanloop organization.
+
+All file types share the following key properties:
+
+
+MANAGED UI OR CODE FIRST
+
+You can create and manage these files in the Humanloop UI [https://app.humanloop.com/], or via the API [/docs/api-reference/].
+Product teams and their subject matter experts may prefer using the UI first workflows for convenience, whereas AI teams and
+engineers may prefer to use the API for greater control and customisation.
+
+
+ARE STRICTLY VERSION CONTROLLED
+
+Files have immutable versions that are uniquely determined by their parameters that characterise the behaviour of the system. For
+example, a Prompt version is determined by the prompt template, base model and hyperparameters chosen. Within the Humanloop Editor
+and via the API, you can commit new versions of a file, view the history of changes and revert to a previous version.
+
+
+HAVE A FLEXIBLE RUNTIME
+
+All files can be called (if you use the Humanloop runtime) or logged to (where you manage the runtime yourself). For example, with
+Prompts, Humanloop integrates to all the major model providers [http://humanloop.com/docs/reference/supported-models]. You can
+choose to call a Prompt, where Humanloop acts as a proxy to the model provider. Alternatively, you can choose to manage the model
+calls yourself and log the results to the Prompt on Humanloop. Using the Humanloop runtime is generally the simpler option and
+allows you to call the file natively within the Humanloop UI, whereas owning the runtime yourself and logging allows you to have
+more fine-grained control.
+
+
+ARE COMPOSABLE WITH SESSIONS
+
+Files can be combined with other files to create more complex systems like chains and agents. For example, a Prompt can call a
+Tool, which can then be evaluated by an Evaluator. The orchestration of more complex systems is best done in code using the API
+and the full trace of execution is accessible in the Humanloop UI for debugging and evaluation purposes.
+
+
+HAVE A SERIALIZED FORM
+
+All files can be exported and imported in a serialized form. For example, Prompts are serialized to our .prompt
+[/docs/reference/prompt-file-format] format. This provides a useful medium for more technical teams that wish to maintain the
+source of truth in their existing version control system like git.
+
+
+SUPPORT DEPLOYMENTS
+
+You can tag file versions with specific environments and target these environments via the UI and API to facilitate robust
+deployment workflows.
+
+
+Humanloop also has the concept of Datasets [/docs/concepts/datasets] that are used within Evaluation [/docs/concepts/evaluators]
+workflows. Datasets share all the same properties, except they do not have a runtime consideration.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/concepts/overview",
+ "title": "Overview",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -542,6 +819,109 @@ endpoint: chat
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Getting Started",
+ "Concepts",
+ ],
+ "content": "[file:eaa91cac-b21c-408b-b20c-0cc3794f34fd]
+
+A Prompt on Humanloop encapsulates the instructions and other configuration for how a large language model should perform a
+specific task. Each change in any of the following properties creates a new version of the Prompt:
+
+ * the template such as Write a song about {{topic}}. For chat models, your template will contain an array of messages.
+ * the model e.g. gpt-4o
+ * all the parameters to the model such as temperature, max_tokens, top_p etc.
+ * any tools available to the model
+
+A Prompt is callable in that if you supply the necessary inputs, it will return a response from the model.
+
+Inputs are defined in the template through the double-curly bracket syntax e.g. {{topic}} and the value of the variable will need
+to be supplied when you call the Prompt to create a generation.
+
+This separation of concerns, keeping configuration separate from the query time data, is crucial for enabling you to experiment
+with different configurations and evaluate any changes. The Prompt stores the configuration and the query time data in Logs
+[./logs], which can then be used to create Datasets for evaluation purposes.
+
+Note that we use a capitalized "[Prompt](/docs/concepts/prompts)" to refer to the entity in Humanloop, and a lowercase "prompt" to
+refer to the general concept of input to the model.
+
+---
+model: gpt-4
+temperature: 1.0
+max_tokens: -1
+provider: openai
+endpoint: chat
+---
+
+ Write a song about {{topic}}
+
+
+
+
+VERSIONING
+
+A Prompt file will have multiple versions as you try out different models, params or templates, but they should all be doing the
+same task, and in general should be swappable with one-another.
+
+By versioning your Prompts, you can track how adjustments to the template or parameters influence the LLM's responses. This is
+crucial for iterative development, as you can pinpoint which versions produce the most relevant or accurate outputs for your
+specific use case.
+
+
+WHEN TO CREATE A NEW PROMPT
+
+You should create a new Prompt for every different ‘task to be done’ with the LLM. For example each of these tasks are things that
+can be done by an LLM and should be a separate Prompt File: Writing Copilot, Personal Assistant, Summariser, etc.
+
+We've seen people find it useful to also create a Prompt called 'Playground' where they can free form experiment without concern
+of breaking anything or making a mess of their other Prompts.
+
+
+USING PROMPTS
+
+Prompts are callable as an API. You supply and query-time data such as input values or user messages, and the model will respond
+with its text output.
+
+You can also use Prompts without proxying through Humanloop to the model provider and instead call the model yourself and
+explicitly log the results to your Prompt.
+
+
+SERIALIZATION (.PROMPT FILE)
+
+Our .prompt file format is a serialized version of a model config that is designed to be human-readable and suitable for checking
+into your version control systems alongside your code. See the .prompt files reference [../reference/prompt-file-format] reference
+for more details.
+
+
+FORMAT
+
+The .prompt file is heavily inspired by MDX [https://mdxjs.com/], with model and hyperparameters specified in a YAML header
+alongside a JSX-inspired format for your Chat Template.
+
+
+BASIC EXAMPLES
+
+\`\`\`jsx Chat --- model: gpt-4 temperature: 1.0 max_tokens: -1 provider: openai endpoint: chat --- You are a friendly assistant. \`\`\`
+\`\`\`jsx Completion --- model: claude-2 temperature: 0.7 max_tokens: 256 top_p: 1.0 provider: anthropic endpoint: complete ---
+Autocomplete the sentence.
+
+Context: {{context}}
+
+{{sentence}}
+
+
+
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/concepts/prompts",
+ "title": "Prompts",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -754,6 +1134,88 @@ Some Tools are executable within Humanloop, and these offer the greatest utility
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Getting Started",
+ "Concepts",
+ ],
+ "content": "[file:b0cea7a9-cf40-41fb-91ce-5085cf7b4bf2]
+
+Humanloop Tools can be used in multiple ways:
+
+ * by the LLM by OpenAI function calling [https://platform.openai.com/docs/guides/function-calling])
+ * within the Prompt template
+ * as part of a chain of events such as a Retrieval Tool in a RAG pipeline
+
+Some Tools are executable within Humanloop, and these offer the greatest utility and convenience. For example, Humanloop has
+pre-built integrations for Google search and Pinecone have and so these Tools can be executed and the results inserted into the
+API or Editor automatically.
+
+
+TOOL USE (FUNCTION CALLING)
+
+Certain large language models support tool use or "function calling". For these models, you can supply the description of
+functions and the model can choose to call one or more of them by providing the values to call the functions with.
+
+[file:b950fee9-1b89-4bcc-8a7a-cd3f097f57cf]
+
+
+Tools all have a functional interface that can be supplied as the JSONSchema needed for function calling. Additionally, if the
+Tool is executable on Humanloop, the result of any tool will automatically be inserted into the response in the API and in the
+Editor.
+
+Tools for function calling can be defined inline in our Editor or centrally managed for an organization.
+
+
+TOOLS IN A PROMPT TEMPLATE
+
+You can add a tool call in a prompt template and the result will be inserted into the prompt sent to the model. This allows you to
+insert retrieved information into your LLMs calls.
+
+For example, if you have {{ google("population of india") }} in your template, this Google tool will get executed and replaced
+with the resulting text “1.42 billion (2024)” before the prompt is sent to the model. Additionally, if your template contains a
+Tool call that uses an input variable e.g. {{ google(query) }} this will take the value of the input supplied in the request,
+compute the output of the Google tool, and insert that result into the resulting prompt that is sent to the model.
+
+Example of a Tool being used within a Prompt template. This example will mean that this Prompt needs two inputs to be supplied
+(\`query\`, and \`top_k\`) [file:62d3d155-1f83-458a-b9f1-b103fc3ba544]
+
+Example of a Tool being used within a Prompt template. This example will mean that this Prompt needs two inputs to be supplied
+(query, and top_k)
+
+
+TOOLS WITHIN A CHAIN
+
+You can call a Tool within a session of events and post the result to Humanloop. For example in a RAG pipeline, instrumenting your
+retrieval function as a Tool, enables you to be able to trace through the full sequence of events. The retrieval Tool will be
+versioned and the logs will be available in the Humanloop UI, enabling you to independently improve that step in the pipeline.
+
+
+SUPPORTED TOOLS
+
+
+THIRD-PARTY INTEGRATIONS
+
+ * Pinecone Search - Vector similarity search using Pinecone vector DB and OpenAI embeddings.
+ * Google Search - API for searching Google: https://serpapi.com/ [https://serpapi.com/].
+ * GET API - Send a GET request to an external API.
+
+
+HUMANLOOP TOOLS
+
+ * Snippet Tool - Create reusable key/value pairs for use in prompts - see how to use the Snippet Tool
+ [/docs/development/guides/reusable-snippets].
+ * JSON Schema - JSON schema that can be used across multiple Prompts - see how to link a JSON Schema Tool
+ [/docs/development/guides/link-json-schema-tool].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/concepts/tools",
+ "title": "Tools",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -922,6 +1384,71 @@ Datasets are primarily used for evaluation purposes on Humanloop. You can think
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Getting Started",
+ "Concepts",
+ ],
+ "content": "[file:0eb61a54-2ea0-4644-aad6-6f7ff921b2f2]
+
+Datasets are primarily used for evaluation purposes on Humanloop. You can think of a Dataset as a collection of testcases for your
+AI applications. Each testcase is represented by a Datapoint, which contains the following fields:
+
+ * Inputs: a collection of prompt variable values which are interpolated into the prompt template at generation time (i.e. they
+ replace the {{ variables }} you define in your prompt template).
+ * Messages: for chat models, as well as the prompt template, you can optionally have a history of chat messages that are fed into
+ amodel when generating a response.
+ * Target: certain types of test cases can benefit from comparing the out your application to an expected or desired behaviour. In
+ the simplest case, this can simply be a string representing the exact output you hope the model produces for the inputs and
+ messages represented by the Datapoint. In more complex cases, you can define an arbitrary JSON object for target with whatever
+ fields are necessary to help you specify the intended behaviour.
+
+
+[file:1774cdcd-21e8-4b64-bd06-3f9be9d99440]
+
+
+VERSIONING
+
+A Dataset will have multiple versions as you iterate on refining your test cases for your task. This tends to be an evolving
+process as you learn more about how your Prompts [./prompts] behave and how users are interacting with your AI application in the
+wild.
+
+Dataset versions are immutable and are uniquely defined by the contents of the Datapoints. If you change, or add additional, or
+remove existing Datapoints, this will constitute a new version. When running Evaluations you always reference a specific version
+of the Dataset. This allows you to have confidence in your Evaluations because they are always tied transparently to a specific
+set of test cases.
+
+
+CREATING DATASETS
+
+Datasets can be created in the following ways:
+
+ * via CSV upload in the UI.
+ * converting from existing Logs [./logs] you've stored on Humanloop. These can be Prompt [./prompts] or Tool [./tools] Logs
+ depending on your Evaluation goals.
+ * via API requests.
+
+See our detailed guide [../evaluation/guides/create-dataset] for more details.
+
+
+EVALUATIONS USE CASE
+
+Evaluations [../evaluation/overview] are run on Humanloop by iterating over the Datapoints in a Dataset and generating output for
+the different versions of your AI application that you wish to compare. For example, you may wish to test out how Claude Opus
+compares to GPT-4 and Google Gemini on cost and accuracy for a specific set of testcases that describe the expected behaviour of
+your application.
+
+Evaluators [./evaluators] are then run against the logs generated by the AI applications for each Datapoint to provide a judgement
+on how well the model performed and can reference the target field in the Datapoint to determine the expected behaviour.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/concepts/datasets",
+ "title": "Datasets",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -1027,6 +1554,101 @@ Evaluators can be leveraged for [Monitoring](../observability/overview) your liv
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Getting Started",
+ "Concepts",
+ ],
+ "content": "[file:bbb4a5dd-7cb0-491c-90e0-db33d16cd18f]
+
+The core entity in the Humanloop evaluation framework is an Evaluator [/docs/concepts/evaluators] - a function you define which
+takes an LLM-generated log as an argument and returns a judgment. The judgment is typically either a boolean or a number,
+indicating how well the model performed according to criteria you determine based on your use case.
+
+Evaluators can be leveraged for Monitoring [../observability/overview] your live AI application, as well as for Evaluations
+[../evaluation/overview] to benchmark different version of your AI application against each other pre-deployment.
+
+
+SOURCES OF JUDGEMENT
+
+Currently, you can define three different Evaluator sources on Humanloop:
+
+ * Code - using simple deterministic rules based judgments against attributes like cost, token usage, latency, regex rules on the
+ output, etc. These are generally fast and cheap to run at scale.
+ * AI - using other foundation models to provide judgments on the output. This allows for more qualitative and nuanced judgments
+ for a fraction of the cost of human judgments.
+ * Human - getting gold standard judgments from either end users of your application, or internal domain experts. This can be the
+ most expensive and slowest option, but also the most reliable.
+
+[file:27c98fd1-3c55-4567-9b09-c21bee8f99f3]
+
+
+ONLINE MONITORING VERSUS OFFLINE EVALUATION
+
+Evaluators can be deployed on Humanloop to support both testing new versions of your Prompts and Tools during development and for
+monitoring live apps that are already in production.
+
+
+ONLINE MONITORING
+
+Evaluators are run against the Logs [./logs] generated by your AI applications. Typically, they are used to monitor deployed model
+performance over time and check for drift or degradation in performance. The Evaluator in this case only takes a single argument -
+the log generated by the model. The Evaluator is expected to return a judgment based on the Log, which can be used to trigger
+alerts or other actions in your monitoring system.
+
+See our Monitoring guides [../observability/overview] for more details.
+
+
+OFFLINE EVALUATIONS
+
+Offline Evaluators are combined with predefined Datasets [./datasets] in order to evaluate your application as you iterate in your
+prompt engineering workflow, or to test for regressions in a CI environment.
+
+A test Dataset is a collection of Datapoints, which are roughly analogous to unit tests or test cases in traditional programming.
+Each datapoint specifies inputs to your model and (optionally) some target data.
+
+When you run an offline evaluation, a Log needs to be generated using the inputs of each Datapoint and the version of the
+application being evaluated. Evaluators then need to be run against each Log to provide judgements, which are then aggregated to
+provide an overall score for the application. Evaluators in this case take the generated Log and the testcase datapoint that gave
+rise to it as arguments.
+
+See our guides on creating Datasets [/docs/evaluation/guides/create-dataset] and running Evaluations
+[/v5/evaluation/guides/run-evaluation] for more details.
+
+
+HUMANLOOP RUNTIME VERSUS YOUR RUNTIME
+
+Evaluations require the following to be generated:
+
+ 1. Logs for the datapoints.
+ 2. Evaluator results for those generated logs.
+
+Evaluators which are defined within the Humanloop UI can be executed in the Humanloop runtime, whereas Evaluators defined in your
+code can be executed in your runtime and the results posted back to Humanloop. This provides flexibility for supporting more
+complex evaluation workflows.
+
+
+RETURN TYPES
+
+Evaluators apply judgment to Logs. This judgment can be of the following types:
+
+ * Boolean - A true/false judgment.
+ * Number - A numerical judgment, which can act as a rating or score.
+ * Select - One of a predefined set of options. One option must be selected.
+ * Multi-select - Any number of a predefined set of options. None, one, or many options can be selected.
+ * Text - A free-form text judgment.
+
+Code and AI Evaluators can return either Boolean or Number judgments. Human Evaluators can return Number, Select, Multi-select, or
+Text judgments.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/concepts/evaluators",
+ "title": "Evaluators",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -1226,6 +1848,26 @@ For the example of a Prompt above, the Log would have one \`input\` called ‘to
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Getting Started",
+ "Concepts",
+ ],
+ "content": "All Prompts [./prompts], Tools [./tools] and Evaluators [./evaluators] produce Logs. A Log contains the inputs and the outputs and
+tracks which version of Prompt/Tool/Evaluator was used.
+
+For the example of a Prompt above, the Log would have one input called ‘topic’ and the output will be the completion.
+
+A Log which contains an input query [file:7b05abc5-c1bd-46e2-806c-70edf6fab22a]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/concepts/logs",
+ "title": "Logs",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -1251,6 +1893,65 @@ Environments enable you to deploy different versions of your files to specific e
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Getting Started",
+ "Concepts",
+ ],
+ "content": "Environments enable you to deploy different versions of your files to specific environments, allowing you to separately manage the
+deployment workflow between testing and production. With environments, you have the control required to manage the full LLM
+deployment lifecycle.
+
+
+MANAGING YOUR ENVIRONMENTS
+
+Every organisation automatically receives a default production environment. You can create additional environments with custom
+names by visiting your organisation's environments page [https://app.humanloop.com/account/environments].
+
+Only Enterprise customers can create more than one environment
+
+The environments you define for your organisation will be available for each file and can be viewed in the file's dashboard once
+created.
+
+[file:a780c738-2da6-432c-95bb-158ea103d44d]
+
+THE DEFAULT ENVIRONMENT
+
+By default, the production environment is marked as the Default environment. This means that all API calls that don't explicitly
+target a specific environment will use this environment. You can rename the default environment on the organisation's environments
+[https://app.humanloop.com/account/environments] page.
+
+Renaming the environments will take immediate effect, so ensure that this change is planned and does not disrupt your production
+workflows.
+
+
+USING ENVIRONMENTS
+
+Once created on the environments page, environments can be used for each file and are visible in the respective dashboards.
+
+You can deploy directly to a specific environment by selecting it in the Deployments section.
+
+[file:d2a9f417-bc43-4729-beb0-52adc535df07]
+
+Alternatively, you can deploy to multiple environments simultaneously by deploying a version from either the Editor or the
+Versions table.
+
+
+USING ENVIRONMENTS VIA API
+
+[file:3e7ce42e-e625-49cd-abbd-51965ca1d3f4]
+
+You can now call the version deployed in a specific environment by including an optional additional environment field. An exmaple
+of this field can be seen in the v5 Prompt Call [/v5/api-reference/prompts/call-stream#request.query.environment] documentation.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/concepts/environments",
+ "title": "Environments",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -1396,6 +2097,32 @@ For more information on how to create and manage directories, see our [Create a
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Getting Started",
+ "Concepts",
+ ],
+ "content": "Directories in Humanloop serve as an organizational tool, allowing users to group related files and structure their work
+logically. They function similarly to folders in a traditional file system, providing a hierarchical structure for managing
+Prompts [/docs/concepts/prompts], Tools [/docs/concepts/tools], Datasets [/docs/concepts/datasets], and other resources.
+
+Directories are primarily for organizational needs but they can have functional impacts if you are referencing Prompts, Tools etc.
+by \`path\`.
+
+We recommend to always refer to Prompts, Tools etc. by their id as this will make your workflows more robust and avoid issues if
+the files are moved.
+
+For more information on how to create and manage directories, see our Create a Directory
+[/docs/development/guides/create-directory] guide.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/concepts/directories",
+ "title": "Directories",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -1419,6 +2146,239 @@ This overview will explain the basics of prompt development, versioning, and man
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Prompt Management + AI Engineering",
+ ],
+ "content": "Your AI application can be broken down into Prompts, Tools, and Evaluators. Humanloop versions and manages each of these artifacts
+to enable team collaboration and evaluation of each component of your AI system.
+
+This overview will explain the basics of prompt development, versioning, and management, and how to best integrate your LLM calls
+with Humanloop.
+
+
+PROMPT MANAGEMENT
+
+[file:eaa91cac-b21c-408b-b20c-0cc3794f34fd]
+
+Prompts [/docs/concepts/prompts] are a fundamental part of interacting with large language models (LLMs). They define the
+instructions and parameters that guide the model's responses. In Humanloop, Prompts are managed with version control, allowing you
+to track changes and improvements over time.
+
+---
+model: gpt-4o
+temperature: 1.0
+max_tokens: -1
+---
+
+ Write a song about {{topic}}
+
+
+
+A Prompt [/docs/concepts/prompts] on Humanloop encapsulates the instructions and other configuration for how a large language
+model should perform a specific task. Each change in any of the following properties creates a new version of the Prompt:
+
+ * the template such as Write a song about {{topic}}. For chat models, your template will contain an array of messages.
+ * the model e.g. gpt-4o
+ * all the parameters to the model such as temperature, max_tokens, top_p etc.
+ * any tools available to the model
+
+
+CREATING A PROMPT
+
+You can create a Prompt explicitly in the Prompt Editor [/docs/development/guides/create-prompt] or via the API
+[/docs/v5/api-reference/prompts/upsert].
+
+New prompts can also be created automatically via the API if you specify the Prompt's path (its name and directory) while
+supplying the Prompt's parameters and template. This is useful if you are developing your prompts in code and want to be able to
+version them as you make changes to the code.
+
+
+VERSIONING
+
+A Prompt will have multiple versions as you experiment with different models, parameters, or templates. However, all versions
+should perform the same task and generally be interchangeable with one another.
+
+By versioning your Prompts, you can track how adjustments to the template or parameters influence the LLM's responses. This is
+crucial for iterative development, as you can pinpoint which versions produce the most relevant or accurate outputs for your
+specific use case.
+
+As you edit your prompt, new versions of the Prompt are created automatically. Each version is timestamped and given a unique
+version ID which is deterministically based on the Prompt's contents. For every version that you want to "save", you commit that
+version and it will be recorded as a new committed version of the Prompt with a commit message.
+
+WHEN TO CREATE A NEW PROMPT
+
+You should create a new Prompt for every different 'task to be done' with the LLM. For example each of these tasks are things that
+can be done by an LLM and should be a separate Prompt File: Writing Copilot, Personal Assistant, Summariser, etc.
+
+We've seen people find it useful to also create a Prompt called 'Playground' where they can free form experiment without concern
+of breaking anything or making a mess of their other Prompts.
+
+
+PROMPT ENGINEERING
+
+Understanding the best practices for working with large language models can significantly enhance your application's performance.
+Each model has its own failure modes, and the methods to address or mitigate these issues are not always straightforward. The
+field of "prompt engineering" has evolved beyond just crafting prompts to encompass designing systems that incorporate model
+queries as integral components.
+
+For a start, read our Prompt Engineering 101 [https://humanloop.com/blog/prompt-engineering-101] guide which covers techniques to
+improve model reasoning, reduce the chances of model hallucinations, and more.
+
+
+PROMPT TEMPLATES
+
+Inputs are defined in the template through the double-curly bracket syntax e.g. {{topic}} and the value of the variable will need
+to be supplied when you call the Prompt to create a generation.
+
+Property context:
+
+Location: {{location}}
+Number of Bedrooms: {{number_of_bedrooms}}
+Number of Bathrooms: {{number_of_bathrooms}}
+Square Footage: {{square_footage}}
+Distance to Key Locations (e.g., downtown, beach): {{distance_to_key_locations}}
+Year Built: {{year_built}}
+Price: {{price}}
+Contact Information: {{contact_information}}
+Instructions:
+Generate a marketing description for the property based on the provided context. The description should be between 150-200 words and have a friendly, engaging tone. Highlight the key features and amenities that make this property attractive to potential buyers. Ensure the copy is informative and enticing, encouraging readers to take action.
+
+
+This separation of concerns, keeping configuration separate from the query time data, is crucial for enabling you to experiment
+with different configurations and evaluate any changes. The Prompt stores the configuration and the query time data in Logs
+[../concepts/logs], which can then be used to create Datasets for evaluation purposes.
+
+
+TOOL USE (FUNCTION CALLING)
+
+Certain large language models support tool use or "function calling". For these models, you can supply the description of
+functions and the model can choose to call one or more of them by providing the values to call the functions with.
+
+Function calling enables the model to perform various tasks:
+
+1. Call external APIs: The model can translate natural language into API calls, allowing it to interact with external services and
+retrieve information.
+
+2. Take actions: The model can exhibit agentic behavior, making decisions and taking actions based on the given context.
+
+3. Provide structured output: The model's responses can be constrained to a specific structured format, ensuring consistency and
+ease of parsing in downstream applications.
+
+[file:b950fee9-1b89-4bcc-8a7a-cd3f097f57cf]
+
+Tools for function calling can be defined inline in the Prompt editor in which case they form part of the Prompt version.
+Alternatively, they can be pulled out in a Tool file which is then referenced in the Prompt.
+
+Each Tool has functional interface that can be supplied as the JSON Schema needed for function calling. Additionally, if the Tool
+is executable on Humanloop, the result of any tool will automatically be inserted into the response in the API and in the Editor.
+
+
+USING PROMPTS
+
+Prompts are callable as an API. You supply and query-time data such as input values or user messages, and the model will respond
+with its text output.
+
+A Prompt is callable in that if you supply the necessary inputs, it will return a response from the model.
+
+Once you have created and versioned your Prompt, you can call it as an API to generate responses from the large language model
+directly. You can also fetch the log the data from your LLM calls, enabling you to evaluate and improve your models.
+
+
+PROXYING YOUR LLM CALLS VS ASYNC LOGGING
+
+The easiest way to both call the large language model with your Prompt and to log the data is to use the Prompt.call() method (see
+the guide on Calling a Prompt [/docs/development/guides/call-prompt]) which will do both in a single API request. However, there
+are two main reasons why you may wish to log the data seperately from generation:
+
+ 1. You are using your own model that is not natively supported in the Humanloop runtime.
+ 2. You wish to avoid relying on Humanloop runtime as the proxied calls adds a small additional latency, or
+
+The prompt.call() Api encapsulates the LLM provider calls (for example openai.Completions.create()), the model-config selection
+and logging steps in a single unified interface. There may be scenarios that you wish to manage the LLM provider calls directly in
+your own code instead of relying on Humanloop.
+
+Humanloop provides a comprehensive platform for developing, managing, and versioning Prompts, Tools and your other artifacts of
+you AI systems. This explainer will show you how to create, version and manage your Prompts, Tools and other artifacts.
+
+You can also use Prompts without proxying through Humanloop to the model provider and instead call the model yourself and
+explicitly log the results to your Prompt.
+
+
+SERIALIZATION (.PROMPT FILE)
+
+Our .prompt file format is a serialized version of a model config that is designed to be human-readable and suitable for checking
+into your version control systems alongside your code. See the .prompt files reference [../reference/prompt-file-format] reference
+for more details.
+
+
+FORMAT
+
+The .prompt file is heavily inspired by MDX [https://mdxjs.com/], with model and hyperparameters specified in a YAML header
+alongside a JSX-inspired format for your Chat Template.
+
+
+BASIC EXAMPLES
+
+\`\`\`jsx Chat --- model: gpt-4o temperature: 0.7 max_tokens: -1 top_p: 1.0 presence_penalty: 0.0 frequency_penalty: 0.0 provider:
+openai endpoint: chat tools: [ { "name": "get_current_weather", "description": "Get the current weather in a given location",
+"parameters": { "type": "object", "properties": { "location": { "type": "string", "name": "Location", "description": "The city and
+state, e.g. San Francisco, CA" }, "unit": { "type": "string", "name": "Unit", "enum": [ "celsius", "fahrenheit" ] } }, "required":
+[ "location" ] }, "source": "inline" } ] --- You are a weather bot designed to provide users with accurate and up-to-date weather
+information.
+
+You have access to a tool called get_current_weather, which allows you to fetch the current weather conditions for any given
+location. Users can request the current weather by specifying a city and state, and optionally, they can choose the unit of
+temperature (Celsius or Fahrenheit).
+
+Your responses should be clear, concise, and friendly, providing all relevant weather details such as temperature, humidity, wind
+speed, and any other important information.
+
+Always ensure to confirm the location and unit of measurement when responding to user inquiries.
+
+\`\`\`
+
+---
+model: claude-2
+temperature: 0.7
+max_tokens: 256
+top_p: 1.0
+provider: anthropic
+endpoint: complete
+---
+Autocomplete the sentence.
+
+Context: {{context}}
+
+{{sentence}}
+
+
+
+DEALING WITH SENSITIVE DATA
+
+{/* WIP - for gartner /start */}
+
+When working with sensitive data in your AI applications, it's crucial to handle it securely. Humanloop provides options to help
+you manage sensitive information while still benefiting from our platform's features.
+
+If you need to process sensitive data without storing it in Humanloop, you can use the save: false parameter when making calls to
+the API or logging data. This ensures that only metadata about the request is stored, while the actual sensitive content is not
+persisted in our systems.
+
+For PII detection, you can set up Guardrails [/docs/observability/alerts-and-guardrails] to detect and prevent the generation of
+sensitive information.
+
+{/* WIP - for gartner /end */}",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/development/overview",
+ "title": "Overview",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -1890,6 +2850,163 @@ You can create an account now by going to the [Sign up page](https://app.humanlo
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Prompt Management + AI Engineering",
+ "How-To Guides",
+ ],
+ "content": "Humanloop acts as a registry of your Prompts [/docs/concepts/prompts] so you can centrally manage all their versions and Logs
+[/docs/concepts/logs], and evaluate and improve your AI systems.
+
+This guide will show you how to create a Prompt in the UI or via the SDK/API.
+
+**Prerequisite**: A Humanloop account.
+
+You can create an account now by going to the Sign up page [https://app.humanloop.com/signup].
+
+
+CREATE A PROMPT IN THE UI
+
+### Create a Prompt File
+
+When you first open Humanloop you’ll see your File navigation on the left. Click ‘+ New’ and create a Prompt.
+
+[file:ad732e1d-77a8-4576-9933-1db6f9d9d28f]
+
+In the sidebar, rename this file to "Comedian Bot" now or later.
+
+
+CREATE THE PROMPT TEMPLATE IN THE EDITOR
+
+The left hand side of the screen defines your Prompt – the parameters such as model, temperature and template. The right hand side
+is a single chat session with this Prompt.
+
+[file:b9ed95cc-edc2-4c49-b8d3-4f164a083123]
+
+Click the "+ Message" button within the chat template to add a system message to the chat template.
+
+[file:5d7dd0e4-73f6-41b9-ad2b-60ba9f349f26]
+
+Add the following templated message to the chat template.
+
+You are a funny comedian. Write a joke about {{topic}}.
+
+
+This message forms the chat template. It has an input slot called topic (surrounded by two curly brackets) for an input value that
+is provided each time you call this Prompt.
+
+On the right hand side of the page, you’ll now see a box in the Inputs section for topic.
+
+ 1. Add a value fortopic e.g. music, jogging, whatever.
+ 2. Click Run in the bottom right of the page.
+
+This will call OpenAI’s model and return the assistant response. Feel free to try other values, the model is very funny.
+
+You now have a first version of your prompt that you can use.
+
+
+COMMIT YOUR FIRST VERSION OF THIS PROMPT
+
+ 1. Click the Commit button
+ 2. Put “initial version” in the commit message field
+ 3. Click Commit
+
+[file:386f75eb-c97a-4923-9823-168a14848719]
+
+
+VIEW THE LOGS
+
+Under the Prompt File click ‘Logs’ to view all the generations from this Prompt
+
+Click on a row to see the details of what version of the prompt generated it. From here you can give feedback to that generation,
+see performance metrics, open up this example in the Editor, or add this log to a dataset.
+
+[file:f2b286b8-7fcf-4323-9308-6ca5fbc22e44]
+
+----------------------------------------------------------------------------------------------------------------------------------
+
+
+CREATE A PROMPT USING THE SDK
+
+The Humanloop Python SDK allows you to programmatically create and version your Prompts [/docs/concepts/prompts] in Humanloop, and
+log generations from your models. This guide will show you how to create a Prompt using the SDK.
+
+Note that you can also version your prompts dynamically with every Prompt
+
+**Prerequisite**: A Humanloop SDK Key.
+
+You can get this from your Organisation Settings page [https://app.humanloop.com/account/api-keys] if you have the right
+permissions [/docs/admin/access-roles].
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+After initializing the SDK client, you can call the Prompt creation endpoint.
+
+
+CREATE THE PROMPT
+
+
+GO TO THE APP
+
+Go to the Humanloop app [https://app.humanloop.com] and you will see your new project as a Prompt with the model config you just
+created.
+
+You now have a Prompt in Humanloop that contains your initial version. You can call the Prompt in Editor and invite team members
+by going to your organization's members page.
+
+
+NEXT STEPS
+
+With the Prompt set up, you can now integrate it into your app by following the Call a Prompt Guide
+[/docs/development/guides/call-prompt].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/development/guides/create-prompt",
+ "title": "Create a Prompt",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -2110,6 +3227,100 @@ This guide will show you how to call your Prompts as an API, enabling you to gen
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Prompt Management + AI Engineering",
+ "How-To Guides",
+ ],
+ "content": "This guide will show you how to call your Prompts as an API, enabling you to generate responses from the large language model that
+uses the versioned template and parameters. If you want to call an LLM with a prompt that you're defining in code follow the guide
+on Calling a LLM through the Humanloop Proxy [/docs/development/guides/proxy-model-calls].
+
+
+CALL AN EXISTING PROMPT
+
+
+PREREQUISITES
+
+Before you can use the new prompt.call() method, you need to have a Prompt. If you don't have one, please follow our Prompt
+creation [/docs/development/guides/create-prompt] guide first.
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+
+GET THE PROMPT ID
+
+In Humanloop, navigate to the Prompt and copy the Prompt ID by clicking on the ID in the top right corner of the screen.
+
+[file:9c6a1d96-687b-4851-a01f-783c80927c39]
+
+
+USE THE SDK TO CALL YOUR MODEL
+
+Now you can use the SDK to generate completions and log the results to your Prompt using the new prompt.call() method:
+
+
+NAVIGATE TO THE LOGS TAB OF THE PROMPT
+
+And you'll be able to see the recorded inputs, messages and responses of your chat.
+
+
+CALL THE LLM WITH A PROMPT THAT YOU'RE DEFINING IN CODE
+
+
+
+
+
+🎉 Now that you have chat messages flowing through your Prompt you can start to log your end user feedback to evaluate and improve
+your models.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/development/guides/call-prompt",
+ "title": "Call a Prompt",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -2274,6 +3485,97 @@ In this guide, we'll cover how to call LLMs using the Humanloop proxy.",
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Prompt Management + AI Engineering",
+ "How-To Guides",
+ ],
+ "content": "This guide walks you through how to call various models through the Humanloop API. This is the same as calling a Prompt
+[/docs/development/guides/call-prompt] but instead of using a version of the Prompt that is defined in Humanloop, you're setting
+the template and parameters directly in code.
+
+The benefits of using the Humanloop proxy are:
+
+ * consistent interface across different AI providers: OpenAI, Anthropic, Google and more – see the full list of supported models
+ [/docs/v5/reference/supported-models]
+ * all your requests are logged automatically
+ * creates versions of your Prompts automatically, so you can track performance over time
+ * can call multiple providers while managing API keys centrally (you can also supply keys at runtime)
+
+In this guide, we'll cover how to call LLMs using the Humanloop proxy.
+
+
+CALL THE LLM WITH A PROMPT THAT YOU'RE DEFINING IN CODE
+
+
+PREREQUISITES
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+
+USE THE SDK TO CALL YOUR MODEL
+
+Now you can use the SDK to generate completions and log the results to your Prompt using the new prompt.call() method:
+
+
+
+
+
+
+NAVIGATE TO THE LOGS TAB OF THE PROMPT
+
+And you'll be able to see the recorded inputs, messages and responses of your chat.
+
+🎉 Now that you have chat messages flowing through your Prompt you can start to log your end user feedback to evaluate and improve
+your models.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/development/guides/proxy-model-calls",
+ "title": "Proxy Model Calls",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -2402,6 +3704,209 @@ However, there may be scenarios that you wish to manage the LLM provider calls d
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Prompt Management + AI Engineering",
+ "How-To Guides",
+ ],
+ "content": "This guide will show you how to capture the Logs [/docs/concepts/logs] of your LLM calls into Humanloop.
+
+The easiest way to log LLM generations to Humanloop is to use the Prompt.call() method (see the guide on Calling a Prompt
+[/docs/development/guides/call-prompt]). You will only need to supply prompt ID and the inputs needed by the prompt template, and
+the endpoint will handle fetching the latest template, making the LLM call and logging the result.
+
+However, there may be scenarios that you wish to manage the LLM provider calls directly in your own code instead of relying on
+Humanloop. For example, you may be using an LLM provider that is not directly supported by Humanloop such as a custom self-hosted
+model, or you may want to avoid adding Humanloop to the critical path of the LLM API calls.
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/development/guides/create-prompt] guide first.
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+
+LOG DATA TO YOUR PROMPT
+
+To log LLM generations to Humanloop, you will need to make a call to the /prompts/log endpoint.
+
+Note that you can either specify a version of the Prompt you are logging against - in which case you will need to take care that
+you are supplying the correct version ID and inputs. Or you can supply the full prompt and a new version will be created if it has
+not been seen before.
+
+
+GET YOUR PROMPT
+
+Fetch a Prompt from Humanloop by specifying the ID. You can ignore this step if your prompts are created dynamically in code.
+
+Here's how to do this in code:
+
+import re
+PROMPT_ID = ""
+prompt = humanloop.prompt.get(id=PROMPT_ID)
+
+# This will fill the prompt template with the variables
+def fill_template(template, variables):
+ def replace_variable(match):
+ variable = match.group(1).strip()
+ if variable in variables:
+ return variables[variable]
+ else:
+ raise ValueError(f"Error: Variable '{variable}' is missing.")
+
+ filled_template = []
+ for message in template:
+ content = message['content']
+ filled_content = re.sub(r'\\{\\{\\s*(.*?)\\s*\\}\\}', replace_variable, content)
+ filled_template.append({**message, 'content': filled_content})
+
+ return filled_template
+
+template = fill_template(prompt.template, {"language": "Python"})
+
+
+const prompt = humanloop.prompts.get({ id: "" });
+
+function fillTemplate(
+ template: Message[],
+ variables: { [key: string]: string }
+): Message[] {
+ const replaceVariable = (match: string, variable: string) => {
+ const trimmedVariable = variable.trim();
+ if (trimmedVariable in variables) {
+ return variables[trimmedVariable];
+ } else {
+ throw new Error(\`Error: Variable '\${trimmedVariable}' is missing.\`);
+ }
+ };
+
+ return template.map((message) => {
+ const filledContent = message.content.replace(
+ /\\{\\{\\s*(.*?)\\s*\\}\\}/g,
+ replaceVariable
+ );
+ return { ...message, content: filledContent };
+ });
+
+ const template = fillTemplate(prompt.template, { language: "Python" });
+}
+
+
+
+CALL YOUR PROMPT
+
+This can be your own model, or any other LLM provider. Here is an example of calling OpenAI:
+
+import openai
+
+client = openai.OpenAI(api_key="")
+
+messages = template + [{ "role": "user", "content": "explain how async works" }]
+
+chat_completion = client.chat.completions.create(
+ messages=messages,
+ model=config.model,
+ temperature=config.temperature
+)
+
+# Parse the output from the OpenAI response.
+output = chat_completion.choices[0].message.content
+
+
+import { OpenAI } from "openai";
+
+const client = new OpenAI({
+ apiKey: "",
+});
+
+const messages = template.concat([
+ { role: "user", content: "explain how async works" },
+]);
+
+const chatCompletion = await client.chat.completions.create({
+ messages: messages,
+ model: prompt.model,
+ temperature: prompt.temperature,
+});
+
+const output = chatCompletion.choices[0].message.content;
+
+
+
+LOG THE RESULT
+
+Finally, log the result to your project:
+
+
+# Get the output from the OpenAI response.
+output_message = chat_completion.choices[0].message
+
+# Log the inputs, outputs and config to your project.
+log = humanloop.prompts.log(
+ id=PROMPT_ID,
+ output_message=output_message,
+ messages=messages,
+)
+
+
+// Get the output from the OpenAI response.
+const outputMessage = chatCompletion.choices[0].message;
+
+const log = humanloop.prompts.log({
+ id: PROMPT_ID,
+ output_message: outputMessage,
+ messages: messages,
+});
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/development/guides/log-to-a-prompt",
+ "title": "Log to a Prompt",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -2692,6 +4197,143 @@ Within the editor, you have the flexibility to create inline JSON Schema tools a
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Prompt Management + AI Engineering",
+ "How-To Guides",
+ ],
+ "content": "Humanloop's Prompt Editor supports for Tool Calling functionality, enabling models to interact with external functions. This
+feature, akin to OpenAI's function calling [https://platform.openai.com/docs/v5/guides/function-calling/function-calling], is
+implemented through JSON Schema tools in Humanloop. These Tools adhere to the widely-used JSON Schema syntax, providing a
+standardized way to define data structures.
+
+Within the editor, you have the flexibility to create inline JSON Schema tools as part of your model configuration. This
+capability allows you to establish a structured framework for the model's responses, enhancing control and predictability.
+Throughout this guide, we'll explore the process of leveraging these tools within the editor environment.
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/development/guides/create-prompt] guide first.
+
+
+CREATE AND USE A TOOL IN THE PROMPT EDITOR
+
+To create and use a tool follow the following steps:
+
+### **Open the editor** Go to a Prompt and open the Editor.
+
+
+SELECT A MODEL THAT SUPPORTS TOOL CALLING
+
+To view the list of models that support Tool calling, see the [Models page](/docs/reference/supported-models#models).
+
+In the editor, you'll see an option to select the model. Choose a model like gpt-4o which supports Tool Calling.
+
+
+DEFINE THE TOOL
+
+To get started with tool definition, it's recommended to begin with one of our preloaded example tools. For this guide, we'll use
+the get_current_weather tool. Select this from the dropdown menu of preloaded examples.
+
+If you choose to edit or create your own tool, you'll need to use the universal JSON Schema syntax [https://json-schema.org/].
+When creating a custom tool, it should correspond to a function you have defined in your own code. The JSON Schema you define here
+specifies the parameters and structure you want the AI model to use when interacting with your function.
+
+[file:0d148432-70f4-4c8b-91aa-23d2854e8331]
+
+
+TEST IT OUT
+
+Now, let's test our tool by inputting a relevant query. Since we're working with a weather-related tool, try typing: What's the
+weather in Boston?. This should prompt OpenAI to respond using the parameters we've defined.
+
+Keep in mind that the model's use of the tool depends on the relevance of the user's input. For instance, a question like 'how are
+you today?' is unlikely to trigger a weather-related tool response.
+
+
+CHECK ASSISTANT RESPONSE FOR A TOOL CALL
+
+Upon successful setup, the assistant should respond by invoking the tool, providing both the tool's name and the required data.
+For our get_current_weather tool, the response might look like this:
+
+get_current_weather({
+ "location": "London"
+})
+
+
+
+INPUT TOOL RESPONSE
+
+After the tool call, the editor will automatically add a partially filled tool message for you to complete.
+
+You can paste in the exact response that the Tool would respond with. For prototyping purposes, you can also just simulate the
+repsonse yourself (LLMs can handle it!). Provide in a mock response:
+
+To input the tool response:
+
+ 1. Find the tool response field in the editor.
+ 2. Enter theresponse matching the expected format, such as:
+
+ { "temperature": 12, "condition": "drizzle", "unit": "celsius" }
+
+
+Remember, the goal is to simulate the tool's output as if it were actually fetching real-time weather data. This allows you to
+test and refine your prompt and tool interaction without needing to implement the actual weather API.
+
+
+SUBMIT TOOL RESPONSE
+
+After entering the simulated tool response, click on the 'Run' button to send the Tool message to the AI model.
+
+
+REVIEW ASSISTANT RESPONSE
+
+The assistant should now respond using the information provided in your simulated tool response. For example, if you input that
+the weather in London was drizzling at 12°C, the assistant might say:
+
+Based on the current weather data, it's drizzling in London with a temperature of 12 degrees Celsius.
+
+This response demonstrates how the AI model incorporates the tool's output into its reply, providing a more contextual and
+data-driven answer.
+
+Example of assistant response using tool data [file:638fd12b-40d5-4e3a-845a-ad4a6c767438]
+
+
+ITERATE AND REFINE
+
+Feel free to experiment with different queries and simulated tool responses. This iterative process helps you fine-tune your
+prompt and understand how the AI model interacts with the tool, ultimately leading to more effective and accurate responses in
+your application.
+
+
+SAVE YOUR PROMPT
+
+By saving your prompt, you're creating a new version that includes the tool configuration.
+
+Congratulations! You've successfully learned how to use tool calling in the Humanloop editor. This powerful feature allows you to
+simulate and test tool interactions, helping you create more dynamic and context-aware AI applications.
+
+Keep experimenting with different scenarios and tool responses to fully explore the capabilities of your AI model and create even
+more impressive applications!
+
+
+NEXT STEPS
+
+After you've created and tested your tool configuration, you might want to reuse it across multiple prompts. Humanloop allows you
+to link a tool, making it easier to share and manage tool configurations.
+
+For more detailed instructions on how to link and manage tools, check out our guide on Linking a JSON Schema Tool
+[/docs/development/guides/link-json-schema-tool].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/development/guides/tool-calling-editor",
+ "title": "Tool calling in Editor",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -2875,6 +4517,129 @@ Instead of needing to copy and paste between your editor sessions and keep track
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Prompt Management + AI Engineering",
+ "How-To Guides",
+ ],
+ "content": "The Snippet Tool supports managing common text 'snippets' that you want to reuse across your different prompts. A Snippet tool
+acts as a simple key/value store, where the key is the name of the common re-usable text snippet and the value is the
+corresponding text.
+
+For example, you may have some common persona descriptions that you found to be effective across a range of your LLM features. Or
+maybe you have some specific formatting instructions that you find yourself re-using again and again in your prompts.
+
+Instead of needing to copy and paste between your editor sessions and keep track of which projects you edited, you can instead
+inject the text into your prompt using the Snippet tool.
+
+
+CREATE AND USE A SNIPPET TOOL
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/development/guides/create-prompt] guide first.
+
+This feature is not available for the Free tier. Please contact us if you wish to learn more about our [Enterprise
+plan](https://humanloop.com/pricing)
+
+To create and use a snippet tool, follow the following steps:
+
+
+CREATE A NEW SNIPPET TOOL
+
+[file:0f219e42-6935-4dbc-8689-265738570928]
+
+
+NAME THE TOOL
+
+Name it assistant-personalities and give it a description Useful assistant personalities.
+
+
+ADD A KEY CALLED "HELPFUL-ASSISTANT"
+
+In the initial box add helpful-assistant and give it a value of You are a helpful assistant. You like to tell jokes and if anyone
+asks your name is Sam.
+
+
+ADD ANOTHER KEY CALLED "GRUMPY-ASSISTANT"
+
+Let's add another key-value pair, so press the Add a key/value pair button and add a new key of grumpy-assistant and give it a
+value of You are a grumpy assistant. You rarely try to help people and if anyone asks your name is Freddy..
+
+[file:0297c12d-7572-4b93-8204-d9553cfc7afe]
+
+
+PRESS CREATE TOOL.
+
+Now your Snippets are set up, you can use it to populate strings in your prompt templates across your projects.
+
+
+NAVIGATE TO THE EDITOR
+
+Go to the Editor of your previously created project.
+
+
+ADD {{ ASSISTANT-PERSONALITIES(KEY) }} TO YOUR PROMPT
+
+Delete the existing prompt template and add {{ assistant-personalities(key) }} to your prompt.
+
+Double curly bracket syntax is used to call a tool in the editor. Inside the curly brackets you put the tool name, e.g. \`{{
+my-tool-name(key) }}\`.
+
+
+ENTER THE KEY AS AN INPUT
+
+In the input area set the value to helpful-assistant. The tool requires an input value to be provided for the key. When adding the
+tool an inputs field will appear in the top right of the editor where you can specify your key.
+
+
+PRESS THE RUN BUTTON
+
+Start the chat with the LLM and you can see the response of the LLM, as well as, see the key you previously defined add in the
+Chat on the right.
+
+[file:52c5db5b-1863-41e9-a5da-f86b9219505b]
+
+
+CHANGE THE KEY TO GRUMPY-ASSISTANT.
+
+If you want to see the corresponding snippet to the key you either need to first run the conversation to fetch the string and see
+it in the preview.
+
+
+PLAY WITH THE LLM
+
+Ask the LLM, I'm a customer and need help solving this issue. Can you help?'. You should see a grumpy response from "Freddy" now.
+
+If you have a specific key you would like to hardcode in the prompt, you can define it using the literal key value: {{
+("key") }}, so in this case it would be {{ assistant-personalities("grumpy-assistant") }}. Delete the
+grumpy-assistant field and add it into your chat template.
+
+
+SAVE YOUR PROMPT.
+
+If you're happy with you're grumpy assistant, save this new version of your Prompt.
+
+[file:79b12d9b-b906-4b77-9ae3-6e49da4ba952]
+
+The Snippet tool is particularly useful because you can define passages of text once in a Snippet tool and reuse them across
+multiple prompts, without needing to copy/paste them and manually keep them all in sync. Editing the values in your tool allows
+the changes to automatically propagate to the Prompts when you update them, as long as the key is the same.
+
+Since the values for a Snippet are saved on the Tool, not the Prompt, changing the values (or keys) defined in your Snippet tools
+can affect the Prompt's behaviour in way that won't be captured by the Prompt's version.
+
+This could be exactly what you intend, however caution should still be used make sure the changes are expected.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/development/guides/reusable-snippets",
+ "title": "Re-use snippets in Prompts",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -3015,6 +4780,74 @@ The default environment is your production environment. Everytime you fetch a Pr
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Prompt Management + AI Engineering",
+ "How-To Guides",
+ ],
+ "content": "Environments [/docs/concepts/environments] are a tagging system for deploying Prompts. They enable you to deploy maintain a
+streamlined deployment workflow and keep track of different versions of Prompts.
+
+The default environment is your production environment. Everytime you fetch a Prompt, Tool, Dataset etc. without specifying an
+alternative environment or specific version, the version that is tagged with the default environment is returned.
+
+
+CREATE AN ENVIRONMENT
+
+
+GO TO YOUR ENVIRONMENTS [https://app.humanloop.com/account/environments] TAB IN YOUR ORGANIZATION'S SETTINGS.
+
+
+CLICK THE '+ ENVIRONMENT' BUTTON TO OPEN THE NEW ENVIRONMENT DIALOG
+
+
+ASSIGN A CUSTOM NAME TO THE ENVIRONMENT
+
+We recommend something short. For example, you could use staging, prod, qa, dev, testing, etc. This name is be used to identify
+the environment in the UI and in the API.
+
+
+CLICK CREATE.
+
+[file:3175c307-fd5c-4178-8488-940700d92042]
+
+
+UPDATING THE DEFAULT ENVIRONMENT
+
+Only Enterprise customers can update their default environment
+
+
+PREREQUISITES
+
+ * You have multiple environments - if not first go through the Create an environment
+ [/docs/development/guides/create-deployment-environments#create-an-environment] section.
+
+Every organization will have a default environment. This can be updated by the following:
+
+
+GO TO YOUR ORGANIZATION'S ENVIRONMENTS [https://app.humanloop.com/account/environments] PAGE.
+
+
+CLICK ON THE DROPDOWN MENU OF AN ENVIRONMENT THAT IS NOT ALREADY THE DEFAULT.
+
+
+CLICK THE MAKE DEFAULT OPTION
+
+A dialog will open asking you if you are certain this is a change you want to make. If so, click the Make default button.
+
+
+VERIFY THE DEFAULT TAG HAS MOVED TO THE ENVIRONMENT YOU SELECTED.
+
+[file:bd81b4d2-f2e7-49e9-8beb-82d5d9818e38]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/development/guides/create-deployment-environments",
+ "title": "Create deployment environments",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -3142,6 +4975,50 @@ In this guide we will demonstrate how to create and use environments.
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Prompt Management + AI Engineering",
+ "How-To Guides",
+ ],
+ "content": "Environments [/docs/concepts/environments] are a tagging system for deploying Prompts. They enable you to deploy maintain a
+streamlined deployment workflow and keep track of different versions of Prompts.
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/development/guides/create-prompt] guide first.
+
+To deploy a model config to an environment:
+
+
+NAVIGATE TO THE DASHBOARD OF YOUR PROMPT
+
+
+CLICK THE DROPDOWN MENU OF THE ENVIRONMENT.
+
+[file:a13e72ab-9366-4763-96a7-bccd57ada8b9]
+
+
+CLICK THE CHANGE DEPLOYMENT BUTTON
+
+
+SELECT A VERSION
+
+Choose the version you want to deploy from the list of available versions.
+
+[file:42640269-c870-4228-873b-d40d0842d33d]
+
+
+CLICK THE DEPLOY BUTTON.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/development/guides/deploy-to-environment",
+ "title": "Deploy to an environment",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -3218,6 +5095,49 @@ You can create an account now by going to the [Sign up page](https://app.humanlo
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Prompt Management + AI Engineering",
+ "How-To Guides",
+ ],
+ "content": "This guide will show you how to create a Directory [/docs/concepts/directories] in the UI. A directory is a collection of files
+and other directories.
+
+**Prerequisite**: A Humanloop account.
+
+You can create an account now by going to the Sign up page [https://app.humanloop.com/signup].
+
+
+CREATE A DIRECTORY
+
+### Create a Directory
+ 1. Open Humanloop and navigate to the File navigation on the left.
+ 2. Click '+ New' and select Directory.
+ 3. Name your new directory, for example, "Summarization App".
+
+You can call files and directories anything you want. Capital letters, spaces are all ok!
+
+Creating a new directory [file:455d4da3-37e9-438a-87a1-ab52bb82b5b1]
+
+
+(OPTIONAL) MOVE A FILE INTO THE DIRECTORY
+
+ 1. In the File navigation sidebar, right-click on the file in the sidebar and select "Move" from the context menu
+ 2. Choose the destination directory
+
+Moving a file into a directory [file:b39f20d9-8a14-46a0-acb2-d89abd6c22dd]
+
+You have now successfully created a directory and moved a file into it. This organization can help you manage your AI applications
+more efficiently within Humanloop.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/development/guides/create-directory",
+ "title": "Create a Directory",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -3297,6 +5217,178 @@ Importantly, updates to this Tool defined here will then propagate automatically
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Prompt Management + AI Engineering",
+ "How-To Guides",
+ ],
+ "content": "It's possible to re-use tool definitions them across multiple Prompts. You achieve this by having a Prompt file which defines a
+JSON schema, and linking them to your Prompt.
+
+You achieve this by creating a JSON Schema Tool and linking that to as many Prompts as you need.
+
+Importantly, updates to this Tool defined here will then propagate automatically to all the Prompts you've linked it to, without
+having to deploy new versions of the Prompt.
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/development/guides/create-prompt] guide first.
+
+
+CREATING AND LINKING A JSON SCHEMA TOOL
+
+To create a reusable JSON Schema tool for your organization, follow these steps:
+
+### Create a new Tool file
+
+Navigate to the homepage or sidebar and click the 'New File' button.
+
+
+CHOOSE THE JSON SCHEMA TOOL TYPE
+
+From the available options, select Json Schema as the Tool type.
+
+
+DEFINE YOUR TOOL'S STRUCTURE
+
+Paste the following JSON into the provided dialog to define your tool's structure:
+
+{
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "name": "Location",
+ "description": "The city and state, e.g. San Francisco, CA"
+ },
+ "unit": {
+ "type": "string",
+ "name": "Unit",
+ "enum": ["celsius", "fahrenheit"]
+ }
+ },
+ "required": ["location"]
+ }
+}
+
+
+If you choose to edit or create your own tool, you'll need to use the universal JSON Schema syntax [https://json-schema.org/].
+When creating a custom tool, it should correspond to a function you have defined in your own code. The JSON Schema you define here
+specifies the parameters and structure you want the AI model to use when interacting with your function.
+
+
+COMMIT THIS VERSION OF THE TOOL
+
+Press the Commit button to commit this version of the Tool, and set it as the default version by deploying it.
+
+
+NAVIGATE TO THE EDITOR OF A PROMPT
+
+Switch to a model that supports tool calling, such as gpt-4o.
+
+To view the list of models that support Tool calling, see the [Models page](/docs/reference/supported-models#models).
+
+
+ADD TOOL TO THE PROMPT DEFINITION.
+
+
+SELECT 'LINK EXISTING TOOL'
+
+In the dropdown, go to the Link existing tool option. You should see your get_current_weather tool, click on it to link it to your
+editor.
+
+[file:82b8db60-27bd-4436-bb3c-8f1da79407e9]
+
+
+TEST THAT THE PROMPT IS WORKING WITH THE TOOL
+
+Now that your Tool is linked you can start using it. In the Chat section, in the User input, enter "what is the weather in
+london?"
+
+Press the Run button.
+
+You should see the Assistant respond with the tool response and a new Tool field inserted to allow you to insert an answer. In
+this case, put in 22 into the tool response and press Run.
+
+[file:1835f4ab-748e-4a64-8764-f69adb82d602]
+
+The model will respond with The current weather in London is 22 degrees.
+
+
+COMMIT THE PROMPT
+
+You've linked a Tool to your Prompt, now let's save it. Press the Save button and name your Prompt weather-model-config.
+
+
+(OPTIONAL) UPDATE THE TOOL
+
+Now that's we've linked your get_current_weather tool to your Prompt, let's try updating the base tool and see how it propagates
+the changes down into your saved weather-model-config config. Navigate back to the Tool in the sidebar and go to the Editor.
+
+
+UPDATE THE TOOL
+
+Let's update both the name, as well as the required fields. For the name, update it to get_current_weather_updated and for the
+required fields, add unit as a required field. The should look like this now:
+
+{
+ "name": "get_current_weather_updated",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "name": "Location",
+ "description": "The city and state, e.g. San Francisco, CA"
+ },
+ "unit": {
+ "type": "string",
+ "name": "Unit",
+ "enum": ["celsius", "fahrenheit"]
+ }
+ },
+ "required": ["location", "unit"]
+ }
+}
+
+
+
+COMMIT AND DEPLOY THE TOOL
+
+Press the Commmmit button and then follow the steps to deloy this version of the Tool.
+
+Your Tool is now updated.
+
+
+TRY THE PROMPT AGAIN
+
+Navigate back to your previous project, and open the editor. You should see the weather-model-config loaded as the active config.
+You should also be able to see the name of your previously linked tool in the Tools section now says get_current_weather_updated.
+
+In the Chat section enter in again, What is the weather in london?, and press Run again.
+
+
+CHECK THE RESPONSE
+
+You should see the updated tool response, and how it now contains the unit field. Congratulations, you've successfully linked a
+JSON Schema tool to your Prompt.
+
+[file:d564f7b0-6b6c-4c89-b1ee-fab1311b93a1] When updating your Tool, remember that the change will affect all the Prompts that
+link to it. Be careful when making updates to not inadvertently change something you didn't intend.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/development/guides/link-tool",
+ "title": "Link a Tool to a Prompt",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -3501,6 +5593,180 @@ Importantly, updates to the \`get_current_weather\` \`JSON Schema\` tool defined
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Prompt Management + AI Engineering",
+ "How-To Guides",
+ ],
+ "content": "It's possible to re-use tool definitions them across multiple Prompts. You achieve this by having a Prompt file which defines a
+JSON schema, and linking them to your Prompt.
+
+You can achieve this by first defining an instance of a JSON Schema tool in your global Tools tab. Here you can define a tool
+once, such as get_current_weather(location: string, unit: 'celsius' | 'fahrenheit'), and then link that to as many model configs
+as you need within the Editor as shown below.
+
+Importantly, updates to the get_current_weather JSON Schema tool defined here will then propagate automatically to all the model
+configs you've linked it to, without having to publish new versions of the prompt.
+
+
+PREREQUISITES
+
+ * A Humanloop account - you can create one by going to our sign up page.
+ * Be on a paid plan - your organization has been upgraded from the Free tier.
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/guides/create-prompt] guide first.
+
+To create a JSON Schema tool that can be reusable across your organization, follow the following steps:
+
+
+CREATING AND LINKING A JSON SCHEMA TOOL
+
+This feature is not available for the Free tier. Please contact us if you wish to learn more about our [Enterprise
+plan](https://humanloop.com/pricing) ### Create a Tool file
+
+Click the 'New File' button on the homepage or in the sidebar.
+
+
+SELECT THE JSON SCHEMA TOOL TYPE
+
+
+DEFINE YOUR TOOL
+
+Set the name, description, and parameters values. Our guide for using Tool Calling in the Prompt Editor [./tool-calling-editor]
+can be a useful reference in this case. We can use the get_current_weather schema in this case. Paste the following into the
+dialog:
+
+{
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "name": "Location",
+ "description": "The city and state, e.g. San Francisco, CA"
+ },
+ "unit": {
+ "type": "string",
+ "name": "Unit",
+ "enum": ["celsius", "fahrenheit"]
+ }
+ },
+ "required": ["location"]
+ }
+}
+
+
+
+PRESS THE CREATE BUTTON.
+
+
+NAVIGATE TO THE EDITOR
+
+Make sure you are using a model that supports tool calling, such as gpt-4o.
+
+See the [Models page](/docs/v5/supported-models) for a list of models that support tool calling.
+
+
+ADD TOOL TO THE PROMPT DEFINITION.
+
+
+SELECT 'LINK EXISTING TOOL'
+
+In the dropdown, go to the Link existing tool option. You should see your get_current_weather tool, click on it to link it to your
+editor.
+
+[file:82b8db60-27bd-4436-bb3c-8f1da79407e9]
+
+
+TEST THAT THE PROMPT IS WORKING WITH THE TOOL
+
+Now that your tool is linked you can start using it as you would normally use an inline tool. In the Chat section, in the User
+input, enter "What is the weather in london?"
+
+Press the Run button.
+
+You should see the Assistant respond with the tool response and a new Tool field inserted to allow you to insert an answer. In
+this case, put in 22 into the tool response and press Run.
+
+[file:1835f4ab-748e-4a64-8764-f69adb82d602]
+
+The model will respond with The current weather in London is 22 degrees.
+
+
+SAVE THE PROMPT
+
+You've linked a tool to your model config, now let's save it. Press the Save button and name your model config
+weather-model-config.
+
+
+(OPTIONAL) UPDATE THE TOOL
+
+Now that's we've linked your get_current_weather tool to your model config, let's try updating the base tool and see how it
+propagates the changes down into your saved weather-model-config config. Navigate back to the Tools in the sidebar and go to the
+Editor.
+
+
+CHANGE THE TOOL.
+
+Let's update both the name, as well as the required fields. For the name, update it to get_current_weather_updated and for the
+required fields, add unit as a required field. The should look like this now:
+
+{
+ "name": "get_current_weather_updated",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "name": "Location",
+ "description": "The city and state, e.g. San Francisco, CA"
+ },
+ "unit": {
+ "type": "string",
+ "name": "Unit",
+ "enum": ["celsius", "fahrenheit"]
+ }
+ },
+ "required": ["location", "unit"]
+ }
+}
+
+
+
+SAVE THE TOOL
+
+Press the Save button, then the following Continue button to confirm.
+
+Your tool is now updated.
+
+
+TRY THE PROMPT AGAIN
+
+Navigate back to your previous project, and open the editor. You should see the weather-model-config loaded as the active config.
+You should also be able to see the name of your previously linked tool in the Tools section now says get_current_weather_updated.
+
+In the Chat section enter in again, What is the weather in london?, and press Run again.
+
+
+CHECK THE RESPONSE
+
+You should see the updated tool response, and how it now contains the unit field. Congratulations, you've successfully linked a
+JSON Schema tool to your model config.
+
+[file:d564f7b0-6b6c-4c89-b1ee-fab1311b93a1] When updating your organization-level JSON Schema tools, remember that the change will
+affect all the places you've previously linked the tool. Be careful when making updates to not inadvertently change something you
+didn't intend.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/development/guides/link-json-schema-tool",
+ "title": "Link JSON Schema Tool",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -3701,6 +5967,95 @@ The judgment is typically either a boolean or a number, indicating how well the
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Evaluation",
+ ],
+ "content": "A key part of successful prompt engineering and deployment for LLMs is a robust evaluation framework. In this section we provide
+guides for how to set up Humanloop's evaluation framework for your Prompts and Tools.
+
+The core entity in the Humanloop evaluation framework is an Evaluator [/docs/concepts/evaluators] - a function you define which
+takes an LLM-generated log as an argument and returns a judgment. The judgment is typically either a boolean or a number,
+indicating how well the model performed according to criteria you determine based on your use case.
+
+
+SOURCES OF JUDGEMENT
+
+Currently, you can define three different Evaluator sources on Humanloop:
+
+ * Code - using simple deterministic rules based judgments against attributes like cost, token usage, latency, regex rules on the
+ output, etc. These are generally fast and cheap to run at scale.
+ * AI - using other foundation models to provide judgments on the output. This allows for more qualitative and nuanced judgments
+ for a fraction of the cost of human judgments.
+ * Human - getting gold standard judgments from either end users of your application, or internal domain experts. This can be the
+ most expensive and slowest option, but also the most reliable.
+
+
+ONLINE MONITORING VS. OFFLINE EVALUATION
+
+Evaluators can be deployed on Humanloop to support both testing new versions of your Prompts and Tools during development and for
+monitoring live apps that are already in production.
+
+
+ONLINE MONITORING
+
+Evaluators are run against the Logs [../concepts/logs] generated by your AI applications. Typically, they are used to monitor
+deployed model performance over time and check for drift or degradation in performance. The Evaluator in this case only takes a
+single argument - the log generated by the model. The Evaluator is expected to return a judgment based on the Log, which can be
+used to trigger alerts or other actions in your monitoring system.
+
+See our Monitoring guides [../observability/overview] for more details.
+
+
+OFFLINE EVALUATIONS
+
+Offline Evaluators are combined with predefined Datasets [../concepts/datasets] in order to evaluate your application as you
+iterate in your prompt engineering workflow, or to test for regressions in a CI environment.
+
+A test Dataset is a collection of Datapoints, which are roughly analogous to unit tests or test cases in traditional programming.
+Each datapoint specifies inputs to your model and (optionally) some target data.
+
+When you run an offline evaluation, a Log needs to be generated using the inputs of each Datapoint and the version of the
+application being evaluated. Evaluators then need to be run against each Log to provide judgements, which are then aggregated to
+provide an overall score for the application. Evaluators in this case take the generated Log and the testcase datapoint that gave
+rise to it as arguments.
+
+See our guides on creating Datasets [./guides/create-dataset] and running Evaluations [../evaluation/overview] for more details.
+
+
+HUMANLOOP RUNTIME VS. YOUR RUNTIME
+
+Evaluations require the following to be generated:
+
+ 1. Logs for the datapoints.
+ 2. Evaluator results for those generated logs.
+
+Using the Evaluations API, Humanloop offers the ability to generate logs either within the Humanloop runtime, or within your own
+runtime. Similarly, Evaluators which are defined within the Humanloop UI can be executed in the Humanloop runtime, whereas
+Evaluators defined in your code can be executed in your runtime and the results posted back to Humanloop. This provides
+flexibility for supporting more complex evaluation workflows.
+
+
+CI/CD INTEGRATION
+
+Humanloop's evaluation framework can be integrated into your CI/CD pipeline, allowing you to automatically test your AI
+applications as part of your development workflow. This integration enables you to catch potential regressions or performance
+issues before they make it to production.
+
+One powerful way to leverage this integration is by triggering evaluation runs in GitHub Actions and having the results commented
+directly on your Pull Requests. This provides immediate feedback to developers and reviewers about the impact of changes on your
+AI application's performance.
+
+To set up CI/CD evaluation follow the guide on CI/CD Integration [/docs/evaluation/guides/cicd-integration].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/evaluation/overview",
+ "title": "Overview",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -3861,6 +6216,103 @@ All the interactions in Editor are stored as Logs within your Prompt and can be
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Evaluation",
+ "How-To Guides",
+ ],
+ "content": "You can compare Prompt versions interactively side-by-side to get a sense for how their behaviour differs; before then triggering
+more systematic Evaluations [/docs/evaluation/guides/run-evaluation]. All the interactions in Editor are stored as Logs within
+your Prompt and can be inspected further and added to a Dataset
+[/docs/evaluation/guides/create-dataset#create-a-dataset-from-logs] for Evaluations.
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/development/guides/create-prompt] guide first.
+
+
+COMPARE PROMPT VERSIONS
+
+In this example we will use a simple Support Agent Prompt that answers user queries about Humanloop's product and docs.
+
+Support agent base prompt. [file:8bd0c80b-97e3-477f-bc43-c6d29b6629e1]
+
+### Create a new version of your Prompt Open your Prompt in the Editor and expand **Parameters** and change some details such as
+the choice of \`Model\`. In this example, we change from \`gpt-4o\` to \`gpt-4o-mini\`. This will create a new uncommitted version of
+the Prompt.
+
+
+
+Now commit the new version of your Prompt by selecting the blue **Commit** button over **Parameters** and providing a helpful commit message like:
+\`\`\`text
+Changed model to gpt-4o-mini
+\`\`\`
+
+### Load up two versions of your Prompt in the Editor
+To load up the previous version side-by-side, select the menu beside the Load button and select the **New panel** option (depending on your screen real-estate, you can add more than 2 panels).
+
+
+Then select to *Load* button in the new panel and select another version of your Prompt to compare.
+
+
+
+### Compare the outputs of both versions
+
+Now you can run the same user messages through both models to compare their behaviours live side-by-side.
+
+
+
+
+
+VIEW PROMPT DIFF FOR DEBUGGING
+
+When debugging more complex Prompts, it's important to understand what changes were made between different versions. Humanloop
+provides a diff view to support this.
+
+
+NAVIGATE TO YOUR PROMPT DASHBOARD
+
+In the sidebar, select the Dashboard section under your Prompt file, where you will find a table of all your historic Prompt
+versions.
+
+Support agent dashboard [file:1aa2c544-2ae2-45e1-ae4c-852ede877c21]
+
+
+SELECT THE VERSIONS TO COMPARE
+
+In the table, select two rows you would like understand the changes between. Then select the Compare Versions button above the
+table.
+
+Support agent diff view [file:f729b709-a0ee-4f46-b3c3-4141e503845a]
+
+ 1. While in the Compare tab, look for the Diff section.
+ 2. This section will highlight the changes made between the selected versions, showing additions, deletions, and modifications.
+ 3. Use this diff view to understand how specific changes in your prompt configuration affect the output.
+
+By following these steps, you can effectively compare different versions of your Prompts and iterate on your instructions to
+improve performance.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/evaluation/guides/comparing-prompt-editor",
+ "title": "Compare and Debug Prompts",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -4029,6 +6481,202 @@ This guide will show you how to create Datasets in Humanloop in three different
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Evaluation",
+ "How-To Guides",
+ ],
+ "content": "Datasets [../../concepts/datasets] are a collection of input-output pairs that can be used to evaluate your Prompts, Tools or even
+Evaluators.
+
+This guide will show you how to create Datasets in Humanloop in three different ways:
+
+ * Create a Dataset from existing Logs - useful for curating Datasets based on how your AI application has been behaving in the
+ wild.
+ * Upload data from CSV - useful for quickly uploading existing tabular data you've collected outside of Humanloop.
+ * Upload via API - useful for uploading more complex Datasets that may have nested JSON structures, which are difficult to
+ represent in tabular .CSV format, and for integrating with your existing data pipelines.
+
+
+CREATE A DATASET FROM LOGS
+
+Prerequisites
+
+You should have an existing Prompt [../../concepts/prompts] on Humanloop and already generated some Logs [../../concepts/logs].
+Follow our guide on creating a Prompt [../../development/guides/create-prompt].
+
+Steps
+
+To create a Dataset from existing Logs:
+
+
+NAVIGATE TO THE LOGS OF YOUR PROMPT
+
+Our Prompt in this example is a Support Agent that answers user queries about Humanloop's product and docs:
+
+Navigate to the Logs table of your Prompt. [file:e54700bb-13b4-44d0-ad1f-48b89de638cc]
+
+
+SELECT A SUBSET OF THE LOGS TO ADD
+
+Filter logs on a criteria of interest, such as the version of the Prompt used, then multi-select Logs.
+
+In the menu in the top right of the page, select Add to dataset.
+
+Filter and select logs of interest. [file:207e5341-974b-4650-b7e0-c98dab0c2bc0]
+
+
+ADD TO A NEW DATASET
+
+Provide a name of the new Dataset and click Create (or you can click add to existing Dataset to append the selection to an
+existing Dataset). Then provide a suitable commit message describing the datapoints you've added.
+
+Create a new dataset from logs. [file:dd95a35c-fcda-402c-b46e-5b2dc714830e]
+
+You will then see the new Dataset appear at the same level in the filesystem as your Prompt.
+
+
+UPLOAD A DATASET FROM CSV
+
+Prerequisites
+
+You should have an existing Prompt [../../concepts/prompts] on Humanloop with a variable defined with our double curly bracket
+syntax {{variable}}. If not, first follow our guide on creating a Prompt [../../development/guides/create-prompt].
+
+In this example, we'll use a Prompt that categorises user queries about Humanloop's product and docs by which feature they relate
+to.
+
+An example Prompt with a variable \`{{query}}\`. [file:b1b15b09-b614-402c-87db-ce2919e54828]
+
+Steps
+
+To create a dataset from a CSV file, we'll first create a CSV in Google Sheets that contains values for our Prompt variable
+{{query}} and then upload it to a Dataset on Humanloop.
+
+### Create a CSV file. - In our Google Sheets example below, we have a column called \`query\` which contains possible values for
+our Prompt variable \`{{query}}\`. You can include as many columns as you have variables in your Prompt template. - There is
+additionally a column called \`target\` which will populate the target output for the classifier Prompt. In this case, we use simple
+strings to define the target. - More complex Datapoints that contain \`messages\` and structured objects for targets are suppoerted,
+but are harder to incorporate into a CSV file as they tend to be hard-to-read JSON. If you need more complex Datapoints, [use the
+API](#upload-via-api) instead.
+
+A CSV file in Google Sheets defining query and taget pairs for our Classifier Prompt. [file:cc2597ba-c565-48a2-b56e-527aef4a8e27]
+
+
+EXPORT THE GOOGLE SHEET TO CSV
+
+In Google sheets, choose File → Download → Comma-separated values (.csv)
+
+
+CREATE A NEW DATASET FILE
+
+On Humanloop, select New at the bottom of the left hand sidebar, then select Dataset.
+
+Creat a new File from the sidebar on Humanloop. [file:c4f43d75-dc17-4b2a-a99e-5f16d348bea4]
+
+
+CLICK UPLOAD CSV
+
+First name your dataset when prompted in the sidebar, then select the Upload CSV button and drag and drop the CSV file you created
+above using the file explorer. You will then be prompted to provide a commit message to describe the initial state of the dataset.
+
+Uploading a CSV file to create a dataset. [file:7bb8f7ee-d389-428a-81f1-a27e21229230]
+
+
+FOLLOW THE LINK IN THE POP-UP TO INSPECT THE DATASET CREATED
+
+You'll see the input-output pairs that were included in the CSV file and you can the rows to inspect and edit the individual
+Datapoints.
+
+Inspect the Dataset created from the CSV file. [file:c36420f6-a8a5-4fc3-b97a-fc047662f3e0]
+
+
+UPLOAD A DATASET VIA API
+
+Prerequisites
+
+If you are using the SDK, the only prerequisite is to have the SDK installed and configured. If you are using the API directly,
+you will need to have an API key.
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+Steps
+
+Using the API is a great way to integrate Humanloop with your existing data pipeline or just to once-off upload a more complex
+Dataset that is hard to represent in a CSV file, such as one that contains an array of messages and JSON targets.
+
+
+POST DATA TO THE DATASETS API
+
+We first define some sample data that contains user messages and desired responses from our Support Agent Prompt and call the POST
+/datasets endpoint to upload it as follows:
+
+
+
+
+INSPECT THE UPLOADED DATASET
+
+After running this code, in your Humanloop workspace you will now see a Dataset called Support Query Ground Truth (or whatever
+value was in path) with your sample data.
+
+Inspect the Dataset uploaded via API. [file:4b040cdd-dd16-4837-bc2a-111768d174d7]
+
+
+
+
+NEXT STEPS
+
+🎉 Now that you have Datasets defined in Humanloop, you can leverage our Evaluations [../overview] feature to systematically
+measure and improve the performance of your AI applications. See our guides on setting up Evaluators [./llm-judge] and Running an
+Evaluation [./run-evaluation] to get started.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/evaluation/guides/create-dataset",
+ "title": "Create a Dataset",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -4322,6 +6970,157 @@ We support a fully featured Python environment; details on the supported package
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Evaluation",
+ "How-To Guides",
+ ],
+ "content": "A code Evaluator [../../concepts/evaluators] is a Python function that takes a generated Log [../../concepts/logs] (and optionally
+a testcase Datapoint [../../concepts/datasets] if comparing to expected results) as input and returns a judgement. The judgement
+is in the form of a boolean or number that measures some criteria of the generated Log defined within the code.
+
+Code Evaluators provide a flexible way to evaluate the performance of your AI applications, allowing you to re-use existing
+evaluation packages as well as define custom evaluation heuristics.
+
+We support a fully featured Python environment; details on the supported packages can be found in the environment reference
+[/docs/v5/reference/python-environment]
+
+
+PREREQUISITES
+
+You should have an existing Prompt [../../concepts/prompts] to evaluate and already generated some Logs [../../concepts/logs].
+Follow our guide on creating a Prompt [../../development/guides/create-prompt].
+
+In this example, we'll reference a Prompt that categorises a user query about Humanloop's product and docs by which feature it
+relates to.
+
+An example Prompt with a variable \`{{query}}\`. [file:b1b15b09-b614-402c-87db-ce2919e54828]
+
+
+CREATE A CODE EVALUATOR
+
+
+CREATE A NEW EVALUATOR
+
+ * Click the New button at the bottom of the left-hand sidebar, select Evaluator, then select Code.
+
+Create code evaluator. [file:86595103-46bb-4455-83bb-2e194ed7f4b0]
+
+ * Give the Evaluator a name when prompted in the sidebar, for example Category Validator.
+
+
+DEFINE THE EVALUATOR CODE
+
+After creating the Evaluator, you will automatically be taken to the code editor. For this example, our Evaluator will check that
+the feature category returned by the Prompt is from the list of allowed feature categories. We want to ensure our categoriser
+isn't hallucinating new features.
+
+ * Make sure the Mode of the Evaluator is set to Online in the options on the left.
+ * Copy and paste the following code into the code editor:
+
+
+ALLOWED_FEATURES = [
+ "Prompt Editor",
+ "Model Integrations",
+ "Online Monitoring",
+ "Offline Evaluations",
+ "Dataset Management",
+ "User Management",
+ "Roles Based Access Control",
+ "Deployment Options",
+ "Collaboration",
+ "Agents and chaining"
+]
+
+def validate_feature(log):
+ print(f"Full log output: \\n {log['output']}")
+ # Parse the final line of the log output to get the returned category
+ feature = log["output"].split("\\n")[-1]
+ return feature in ALLOWED_FEATURES
+
+
+You can define multiple functions in the code Editor to organize your evaluation logic. The final function defined is used as the
+main Evaluator entry point that takes the Log argument and returns a valid judgement.
+
+
+DEBUG THE CODE WITH PROMPT LOGS
+
+ * In the debug console beneath where you pasted the code, click Select Prompt or Dataset and find and select the Prompt you're
+ evaluating. The debug console will load a sample of Logs from that Prompt.
+
+The debug console for testing the code. [file:6a2ef4ef-257e-4e42-b3ab-c0ca6e9243e3]
+
+ * Click the Run button at the far right of one of the loaded Logs to trigger a debug run. This causes the code to be executed
+ with the selected Log as input and populates the Result column.
+ * Inspect the output of the executed code by selecting the arrow to the right of Result.
+
+Inspect evaluator log in debug console. [file:795262a5-e35e-4c8d-ad3d-2dea45d7b9ff]
+
+
+COMMIT THE CODE
+
+Now that you've validated the behaviour, commit the code by selecting the Commit button at the top right of the Editor and provide
+a suitable commit message describing your changes.
+
+
+INSPECT EVALUATOR LOGS
+
+Navigate to the Logs tab of the Evaluator to see and debug all the historic usages of this Evaluator.
+
+Evaluator logs table. [file:00e5bb5e-9bfc-4f5d-bc66-c771eb04eaa8]
+
+
+MONITOR A PROMPT
+
+Now that you have an Evaluator, you can use it to monitor the performance of your Prompt by linking it so that it is automatically
+run on new Logs.
+
+
+LINK THE EVALUATOR TO THE PROMPT
+
+ * Navigate to the Dashboard of your Prompt
+ * Select the Monitoring button above the graph and select Connect Evaluators.
+ * Find and select the Evaluator you just created and click Chose.
+
+Select Evaluator for monitoring. [file:d04672a6-e428-4d7b-bfa5-96254a03ab53]
+
+You can link to a deployed version of the Evaluator by choosing the environment such as \`production\`, or you can link to a
+specific version of the Evaluator. If you want changes deployed to your Evaluator to be automatically reflected in Monitoring,
+link to the environment, otherwise link to a specific version.
+
+This linking results in: - An additional graph on your Prompt dashboard showing the Evaluator results over time. - An additional
+column in your Prompt Versions table showing the aggregated Evaluator results for each version. - An additional column in your
+Logs table showing the Evaluator results for each Log.
+
+
+GENERATE NEW LOGS
+
+Navigate to the Editor tab of your Prompt and generate a new Log by entering a query and clicking Run.
+
+
+INSPECT THE MONITORING RESULTS
+
+Navigate to the Logs tab of your Prompt and see the result of the linked Evaluator against the new Log. You can filter on this
+value in order to create a Dataset [/docs/evaluation/guides/create-dataset] of interesting examples.
+
+See the results of monitoring on your logs. [file:2083e83c-6964-4f90-93f6-d62a165a2b33]
+
+
+NEXT STEPS
+
+ * Explore AI Evaluators [/docs/evaluation/guides/llm-as-a-judge] and Human Evaluators [/docs/evaluation/guides/human-evaluators]
+ to complement your code-based judgements for more qualitative and subjective criteria.
+ * Combine your Evaluator with a Dataset [/docs/concepts/datasets] to run Evaluations [/docs/evaluation/guides/run-evaluation] to
+ systematically compare the performance of different versions of your AI application.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/evaluation/guides/code-based-evaluator",
+ "title": "Set up a code Evaluator",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -4563,6 +7362,141 @@ The judgement is in the form of a boolean or number that measures some criteria
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Evaluation",
+ "How-To Guides",
+ ],
+ "content": "LLMs can be used for evaluating the quality and characteristics of other AI-generated outputs. When correctly prompted, LLMs can
+act as impartial judges, providing insights and assessments that might be challenging or time-consuming for humans to perform at
+scale.
+
+In this guide, we'll explore how to setup an LLM as an AI Evaluator [../../concepts/evaluators] in Humanloop, demonstrating their
+effectiveness in assessing various aspects of AI-generated content, such as checking for the presence of Personally Identifiable
+Information (PII).
+
+An AI Evaluator [../../concepts/evaluators] is a Prompt that takes attributes from a generated Log [../../concepts/logs] (and
+optionally from a testcase Datapoint [../../concepts/dataset] if comparing to expected results) as context and returns a
+judgement. The judgement is in the form of a boolean or number that measures some criteria of the generated Log defined within the
+Prompt instructions.
+
+
+PREREQUISITES
+
+You should have an existing Prompt [../../concepts/prompts] to evaluate and already generated some Logs [../../concepts/logs].
+Follow our guide on creating a Prompt [../../development/guides/create-prompt].
+
+In this example we will use a simple Support Agent Prompt that answers user queries about Humanloop's product and docs.
+
+Support agent base prompt. [file:8bd0c80b-97e3-477f-bc43-c6d29b6629e1]
+
+
+CREATE AN LLM EVALUATOR
+
+
+CREATE A NEW EVALUATOR
+
+ * Click the New button at the bottom of the left-hand sidebar, select Evaluator, then select AI.
+
+ * Give the Evaluator a name when prompted in the sidebar, for example PII Identifier.
+
+
+DEFINE THE EVALUATOR PROMPT
+
+After creating the Evaluator, you will automatically be taken to the Evaluator editor. For this example, our Evaluator will check
+whether the request to, or response from, our support agent contains PII. We want to understand whether this is a potential issue
+that we wish to mitigate with additional Guardrails [../../observability/alerts-and-guardails] in our agent workflow.
+
+ * Make sure the Mode of the Evaluator is set to Online in the options on the left.
+ * Copy and paste the following Prompt into the Editor:
+
+You are a helpful assistant. Your job is to observe the requests and outputs to a support agent and identify whether or not they contain any PII.
+
+Examples of PII information are:
+- Names
+- Addresses
+- Bank account information
+- Job information
+
+Here is the request and response information:
+###
+Request:
+{{log.messages}}
+###
+Response:
+{{log.output_message}}
+###
+
+Your response should contain the rationale and the final binary true/false verdict as to whether PII exists in the request resposne. The final true/false verdit should be on a new line at the end.
+
+
+In the Prompt Editor for an LLM evaluator, you have access to the underlying log you are evaluating as well as the testcase
+Datapoint that gave rise to it if you are using a Dataset for offline Evaluations. These are accessed with the standard {{
+variable }} syntax, enhanced with a familiar dot notation to pick out specific values from inside the log and testcase objects.
+
+For example, suppose you are evaluating a Log object like this.
+
+{
+ "id": "data_B3RmIu9aA5FibdtXP7CkO",
+ "prompt": {...},
+ "inputs": {
+ "query": "What is the meaning of life?",
+ },
+ "messages": []
+ "output": "I'm sorry, as an AI I don't have the capacity to understand the meaning of life.",
+ "metadata": {...},
+ ...etc
+}
+
+
+In the LLM Evaluator Prompt, {{ log.inputs.query }} will be replaced with the actual query in the final prompt sent to the LLM
+Evaluator.
+
+In order to get access to the fully populated Prompt that was sent in the underlying Log, you can use the special variable {{
+log_prompt }}.
+
+
+DEBUG THE CODE WITH PROMPT LOGS
+
+ * In the debug console beneath where you pasted the code, click Select Prompt or Dataset and find and select the Prompt you're
+ evaluating. The debug console will load a sample of Logs from that Prompt.
+
+The debug console for testing the Evaluator Prompt. [file:672cd87b-ad75-4848-91f3-a098a587945d]
+
+ * Click the Run button at the far right of one of the loaded Logs to trigger a debug run. This causes the Evaluator Prompt to be
+ called with the selected Log attributes as input and populates the Result column.
+ * Inspect the output of the executed code by selecting the arrow to the right of Result.
+
+Inspect evaluator log in debug console. [file:8d84ea0c-d34a-4bfc-b1fc-abe9b8896fb6]
+
+
+COMMIT THE CODE
+
+Now that you've validated the behaviour, commit the Evaluator Prompt by selecting the Commit button at the top right of the Editor
+and provide a suitable commit message describing your changes.
+
+
+INSPECT EVALUATOR LOGS
+
+Navigate to the Logs tab of the Evaluator to see and debug all the historic usages of this Evaluator.
+
+Evaluator logs table. [file:b6ce6ac1-d556-4d13-b5ff-49928d326c9c]
+
+
+NEXT STEPS
+
+ * Explore Code Evaluators [./ocde-based-evaluator] and Human Evaluators [./human-evaluator] to complement your AI judgements.
+ * Combine your Evaluator with a Dataset [../../concepts/datasets] to run Evaluations [./run-evaluation] to systematically compare
+ the performance of different versions of your AI application.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/evaluation/guides/llm-as-a-judge",
+ "title": "Set up LLM as a Judge",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -4756,6 +7690,79 @@ These Evaluators can be attached to Prompts and Evaluations.",
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Evaluation",
+ "How-To Guides",
+ ],
+ "content": "Human Evaluators allow your subject-matter experts and end-users to provide feedback on Prompt Logs. These Evaluators can be
+attached to Prompts and Evaluations.
+
+
+CREATING A HUMAN EVALUATOR
+
+This section will bring you through creating and setting up a Human Evaluator. As an example, we'll use a "Tone" Evaluator that
+allows feedback to be provided by selecting from a list of options.
+
+
+CREATE A NEW EVALUATOR
+
+ * Click the New button at the bottom of the left-hand sidebar, select Evaluator, then select Human.
+
+New Evaluator dialog [file:03326b58-9d6d-4c66-aa94-3afbe47f8ddd]
+
+ * Give the Evaluator a name when prompted in the sidebar, for example "Tone".
+
+Created Human Evaluator being renamed to "Tone" [file:f6746330-2e87-4769-98eb-3d02a37b547a]
+
+
+DEFINE THE JUDGMENT SCHEMA
+
+After creating the Evaluator, you will automatically be taken to the Editor. Here, you can define the schema detailing the kinds
+of judgments to be applied for the Evaluator. The Evaluator will be initialized to a 5-point rating scale by default.
+
+In this example, we'll set up a feedback schema for a "Tone" Evaluator. See the Return types documentation
+[../../concepts/evaluators#return-types] for more information on return types.
+
+ * Select Multi-select within the Return type dropdown. "Multi-select" allows you to apply multiple options to a single Log.
+ * Add the following options, and set the valence for each:
+ * Enthusiastic [positive]
+ * Informative [postiive]
+ * Repetitive [negative]
+ * Technical [negative]
+ * Update the instructions to "Select all options that apply to the output."
+
+Tone evaluator set up with options and instructions [file:9c477a6f-8107-4320-8cd9-ff101f262b7a]
+
+
+COMMIT AND DEPLOY THE EVALUATOR
+
+ * Click Commit in the top-right corner.
+ * Enter "Added initial tone options" as a commit message. Click Commit.
+
+Commit dialog over the "Tone" Evaluator [file:4621f64b-49b5-4c15-b28a-4765e446568a]
+
+ * In the "Version committed" dialog, click Deploy.
+ * Select the checkbox for you default Environment (usually named "production"), and confirm your deployment.
+
+Dialog deploying the "Tone" Evaluator to the "production" Environment [file:729f39cf-708d-4294-adca-63c9a7ebfab9]
+
+:tada: You've now created a Human Evaluator that can be used to collect feedback on Prompt Logs.
+
+
+NEXT STEPS
+
+ * Use Human Evaluators in Evaluations [./run-human-evaluation] to collect annotations on Prompt Logs from subject-matter experts.
+ * Attach Human Evaluators to Prompts [../../observability/guides/capture-user-feedback] to collect end-user feedback",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/evaluation/guides/human-evaluators",
+ "title": "Set up a Human Evaluator",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -4885,6 +7892,130 @@ The Evaluation then uses these judgements to provide a summary report of the per
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Evaluation",
+ "How-To Guides",
+ ],
+ "content": "This feature is not available for the Free tier. Please contact us if you wish to learn more about our [Enterprise
+plan](https://humanloop.com/pricing)
+
+An Evaluation on Humanloop leverages a Dataset [../../concepts/datasets], a set of Evaluators [../../concepts/evaluators] and
+different versions of a Prompt [../../concepts/prompts] to compare.
+
+The Dataset contains testcases describing the inputs (and optionally the expected results) for a given task. The Evaluators define
+the criteria for judging the performance of the Prompts when executed using these inputs.
+
+Each of the Prompt versions you want to compare are run against the same Dataset producing Logs [../../concepts/logs]; judgements
+are then provided by Evaluators. The Evaluation then uses these judgements to provide a summary report of the performance allowing
+you to systematically compare the performance of the different Prompt versions.
+
+
+PREREQUISITES
+
+ * A set of Prompt [../../concepts/prompts] versions you want to compare - see the guide on creating Prompts
+ [./comparing-prompt-editor].
+ * A Dataset [../../concepts/datasets] containing testcases for the task - see the guide on creating a Dataset [./create-dataset].
+ * At least one Evaluator [../../concepts/evaluators] to judge the performance of the Prompts - see the guides on creating Code
+ [/docs/evaluation/guides/code-based-evaluator], AI [/docs/evaluation/guides/llm-as-a-judge] and Human
+ [/docs/evaluation/guides/human-evaluators] Evaluators.
+
+ You can combine multiple different types of Evaluator in a single Evaluation. For example,
+you might use an AI Evaluator to judge the quality of the output of the Prompt and a code Evaluator to check the output is below
+some latency and cost threshold.
+
+For this example, we're going to evaluate the performance of a Support Agent that responds to user queries about Humanloop's
+product and documentation. Our goal is to understand which base model between gpt-4o, gpt-4o-mini and claude-3-5-sonnet-20240620
+is most appropriate for this task.
+
+Variations of the Support Agent Prompt, each using a different base model. [file:d4bc2957-8e59-4481-a421-2b2997aef2c5]
+
+
+RUN AN EVALUATION VIA UI
+
+For Product and AI teams, the ability to trigger Evaluations against a Dataset within the Humanloop UI allows them to
+systematically compare the performance to make informed decisions on which to deploy.
+
+### Navigate to the Evaluations tab of your Prompt
+ * On the left-hand sidebar, click on the Evaluations tab beneath your Prompt.
+ * Click the Evaluate button top right, which presents the setup panel for the Evaluation.
+
+Prompt Evaluations tab. [file:36fc9ed5-b127-470d-a1d5-5add954b48c8]
+
+
+SETUP THE EVALUATION
+
+ * Select a Dataset using +Dataset.
+ * Add the Prompt versions you want to compare using +Version - note you can multi-select versions in the modal resulting in
+ multiple columns.
+ * Add the Evaluators you want to use to judge the performance of the Prompts using +Evaluator. By default, Cost, Tokens and
+ Latency Evaluators are pre-selected.
+
+ By default the system will re-use Logs if they exist for the chosen Dataset, Prompts and Evaluators.
+This makes it easy to extend reports without paying the cost of re-running your Prompts and Evaluators.
+
+If you want to force the system to re-run the Prompts against the Dataset producing a new batch of Logs, you can select the Manage
+button in the setup panel and choose +New Batch.
+
+ * Select Save to trigger the Evaluation report. You will see the report below the setup panel populate with a progress bar and
+ status pending as the Logs are generated on Humanloop.
+
+In progress Evaluation report [file:8cd034a4-fbbf-4e9e-bc46-7ce52bacd5b1]
+
+ This guide assumes both the Prompt and Evaluator Logs are generated using the Humanloop
+runtime. For certain use cases where more flexibility is required, the runtime for producing Logs instead lives in your code - see
+our guide on Logging [../../development/guides/logging], which also works with our Evaluations feature. We have a guide for how to
+run Evaluations with Logs generated in your code coming soon!
+
+
+REVIEW THE RESULTS
+
+It will generally take at least a couple of minutes before the Evaluation report is marked as completed as the system generates
+all the required Prompt and Evaluator Logs.
+
+Once the report is completed, you can review the performance of the different Prompt versions using the Evaluators you selected.
+
+ * The top spider plot provides you with a summary of the average Evaluator performance across all the Prompt versions. In our
+ case, gpt-4o, although on average slightly slower and more expensive on average, is significantly better when it comes to User
+ Satisfaction.
+
+Evaluation Spider plot [file:22744b0c-8515-466d-bcb6-8e2c7c7ee27c]
+
+ * Below the spider plot, you can see the breakdown of performance per Evaluator.
+
+Evaluation Evaluator stats breakdown [file:4d09e21e-48b1-4820-b097-61e1c7c45c9d]
+
+ * To drill into and debug the Logs that were generated, select the Logs button top right of the Evaluation report. This brings
+ you to the Evaluation Logs table and you can filter and review logs to understand the performance better and replay Logs in our
+ Prompt Editor.
+
+Drill down to Evaluatoin Logs. [file:fe77e299-07f3-4e6f-8c85-d231e64e31eb]
+
+
+RUN AN EVALUATION VIA API
+
+For Engineering teams, the ability to trigger Evaluations via the API allows them to integrate the Evaluation process into their
+existing pipelines.
+
+This content is currently under development. Please refer to our [V4 documentation](https://docs.humanloop.com/v4) for the current
+docs.
+
+
+NEXT STEPS
+
+ * Incorporate this Evaluation process into your Prompt engineering and deployment workflow.
+ * Setup Evaluations where the runtime for producing Logs lives in your code - see our guide on Logging
+ [/docs/development/guides/log-to-a-prompt].
+ * Utilise Evaluations as part of your CI/CD pipeline [/docs/evaluation/guides/cicd-integration]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/evaluation/guides/run-evaluation",
+ "title": "Run an Evaluation",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -5123,6 +8254,78 @@ to evaluate the quality of your Prompts' outputs.",
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Evaluation",
+ "How-To Guides",
+ ],
+ "content": "By attaching Human Evaluators to your Evaluations, you can collect annotations from your subject-matter experts to evaluate the
+quality of your Prompts' outputs.
+
+
+PREREQUISITES
+
+ * You have set up a Human Evaluator appropriate for your use-case. If not, follow our guide to create a Human Evaluator
+ [/docs/evaluation/guides/human-evaluators].
+ * You are familiar with setting up Evaluations in Humanloop. See our guide to creating Evaluations
+ [/docs/evaluation/guides/run-evaluation].
+
+
+USING A HUMAN EVALUATOR IN AN EVALUATION
+
+
+CREATE A NEW EVALUATION
+
+ * Go to the Evaluations tab of a Prompt.
+ * Click Evaluate in the top-right corner.
+ * Set up your Evaluation by selecting a Dataset and some Prompt versions to evaluate. See our guide to Running an Evaluation in
+ the UI [/docs/evaluation/guides/run-evaluation#run-an-evaluation-via-ui] for more details.
+ * Click the + Evaluator button to add a Human Evaluator to the Evaluation. This will bring up a dialog where you can select the
+ Human Evaluator you created earlier. Within this dialog, select the "Tone" Evaluator, and then select its latest version which
+ should be at the top.
+ * Click + Choose to add the Evaluator to the Evaluation.
+
+Evaluation set up with "Tone" Evaluator [file:b4c54b52-ccd5-4c2f-80bc-8bed0ee6d1ac]
+
+ * Click Save/Run to create the Evaluation and start generating Logs to evaluate.
+
+
+APPLY JUDGMENTS TO GENERATED LOGS
+
+When you save an Evaluation, Humanloop will automatically generate Logs using the specified Prompt versions and Dataset. When the
+required Logs are generated, a "Human Evaluations incomplete" message will be displayed in a toolbar at the top of the Evaluation.
+
+ * Go to the Logs tab of the Evaluation to view the generated Logs.
+
+Evaluation Logs tab [file:ebb9ef20-736e-4190-b73d-5d551cf17a01]
+
+ * Expand the drawer for a Log by clicking on the row to view the Log details. Here, you can view the generated output and apply
+ judgments to the Log.
+
+Evaluation Log drawer [file:56fd15e9-3af3-4293-974e-73c63af599e8]
+
+ * When you've completed applying judgments, click on Mark as complete in the toolbar at the top of the page. This will update the
+ Evaluation's status.
+
+Completed Evaluation [file:f2b32305-588d-42a0-9c04-97d6ba843236]
+
+
+REVIEW JUDGMENTS STATS
+
+Go to the Overview tab of the Evaluation to view the aggregate stats of the judgments applied to the Logs. On this page, an
+aggregate view of the judgments provided to each Prompt version is displayed in a table, allowing you to compare the performance
+of different Prompt versions.
+
+Evaluation Overview tab [file:b7ae74b2-02be-4594-b6fd-9320eff04cf4]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/evaluation/guides/run-human-evaluation",
+ "title": "Run a Human Evaluation",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -5233,6 +8436,49 @@ In this guide, we will walk through setting up CI/CD integration for Humanloop e
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Evaluation",
+ "How-To Guides",
+ ],
+ "content": "This feature is not available for the Free tier. Please contact us if you wish to learn more about our [Enterprise
+plan](https://humanloop.com/pricing)
+
+
+SETTING UP CI/CD INTEGRATION WITH GITHUB ACTIONS
+
+Integrating Humanloop evaluations into your CI/CD pipeline allows you to automatically test your AI applications as part of your
+development workflow. This guide will walk you through setting up this integration using GitHub Actions.
+
+
+PREREQUISITES
+
+ * A GitHub repository for your project
+ * A Humanloop account with access to Evaluations
+ * A Prompt and Dataset set up in Humanloop
+ * An Evaluator configured in Humanloop
+
+
+STEPS TO SET UP CI/CD INTEGRATION
+
+### Create a GitHub Actions Workflow
+
+In your GitHub repository, create a new file .github/workflows/humanloop-eval.yml with the following content:
+
+This content is currently under development. Please refer to our [V4 documentation](https://docs.humanloop.com/v4) for the current
+docs.
+
+
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/evaluation/guides/cicd-integration",
+ "title": "Set up CI/CD Evaluations",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -5341,6 +8587,59 @@ Evaluators in the monitoring context act as continuous checks on your deployed m
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Observability",
+ ],
+ "content": "At the core of Humanloop's monitoring system are evaluators - functions you define that analyze LLM-generated logs and produce
+evaluations. These evaluations can be boolean flags or numerical scores, providing insights into how well your model is performing
+based on criteria specific to your use case.
+
+Evaluators in the monitoring context act as continuous checks on your deployed models, helping you maintain quality, detect
+anomalies, and ensure your LLMs are behaving as expected in the production environment.
+
+
+TYPES
+
+Humanloop supports three types of evaluators for monitoring:
+
+ * Code based - Using our in-browser editor, define simple Python functions to act as evaluators. These run automatically on your
+ logs.
+ * LLM as judge - Use LLMs to evaluate the outputs of other Prompts or Tools. Our editor lets you create prompts that pass log
+ data to a model for assessment. This is ideal for subjective evaluations like tone and factual accuracy. These also run
+ automatically.
+ * Human evaluators - Collect feedback from human evaluators using our feedback API. This allows you to incorporate human judgment
+ or in-app actions into your monitoring process.
+
+Both code-based and LLM-based evaluators run automatically on your logs, while human evaluators provide a way to incorporate
+manual feedback when needed.
+
+
+MONITORING VS EVALUATION
+
+While monitoring and evaluation are closely related, they serve different purposes in the lifecycle of your LLM-powered
+applications:
+
+ * Monitoring is the continuous assessment of your deployed models in production environments. It involves real-time analysis of
+ logs generated by your live system, providing immediate insights into performance and behavior.
+
+ * Evaluation, on the other hand, typically refers to offline testing and assessment during the development phase or for periodic
+ performance checks.
+
+Humanloop's monitoring capabilities allow you to set up evaluators that automatically run on logs from your production
+environment, giving you real-time insights into your model's performance.
+
+For detailed information on offline evaluation and testing during development, please refer to our Evaluation guide
+[/docs/evaluation/overview].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/observability/overview",
+ "title": "Overview",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -5412,6 +8711,164 @@ Monitoring your AI system's performance in production is crucial for maintaining
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Observability",
+ ],
+ "content": "{/* WIP - for gartner /start */}
+
+Monitoring your AI system's performance in production is crucial for maintaining quality and catching issues early. Humanloop
+provides tools to set up automated alerts based on your custom evaluation criteria, and guardrails to ensure that issues are
+prevented from happening.
+
+
+ALERTING
+
+Alerting is a critical component of any robust monitoring system. It allows you to be promptly notified of important events or
+issues in your Humanloop environment. By setting up alerts, you can proactively respond to potential problems and maintain the
+health and performance of your AI system.
+
+Alerting in Humanloop takes advantage of the Evaluators [/docs/concepts/evaluators] you have enabled, and uses webhooks to send
+alerts to your preferred communication channels.
+
+
+OVERVIEW
+
+Alerts are triggered when certain predefined conditions are met in your system. These conditions are typically monitored using log
+evaluators, which continuously analyze system logs and metrics.
+
+
+USE CASES FOR ALERTING
+
+ 1. Performance Issues
+
+ * Use Case: Alert when API response times exceed a certain threshold.
+ * Benefit: Quickly identify and address performance bottlenecks.
+
+ 2. Error Rate Spikes
+
+ * Use Case: Notify when the error rate for a specific service surpasses normal levels.
+ * Benefit: Detect and investigate unusual error patterns promptly.
+
+ 3. Resource Utilization
+
+ * Use Case: Alert when CPU or memory usage approaches capacity limits.
+ * Benefit: Prevent system crashes and maintain optimal performance.
+
+ 4. Security Incidents
+
+ * Use Case: Notify on multiple failed login attempts or unusual access patterns.
+ * Benefit: Rapidly respond to potential security breaches.
+
+ 5. Data Quality Issues
+
+ * Use Case: Alert when incoming data doesn't meet predefined quality standards.
+ * Benefit: Maintain data integrity and prevent propagation of bad data.
+
+ 6. SLA Violations
+
+ * Use Case: Notify when service level agreements are at risk of being breached.
+ * Benefit: Proactively manage client expectations and service quality.
+
+
+BEST PRACTICES FOR ALERTING
+
+ 1. Define Clear Thresholds: Establish meaningful thresholds based on historical data and business requirements.
+ 2. Prioritize Alerts: Categorize alerts by severity to ensure critical issues receive immediate attention.
+ 3. Provide Context: Include relevant information in alerts to aid in quick diagnosis and resolution.
+ 4. Avoid Alert Fatigue: Regularly review and refine alert conditions to minimize false positives.
+ 5. Establish Escalation Procedures: Define clear processes for handling and escalating different types of alerts.
+
+
+WEBHOOKS
+
+Webhooks are a crucial component of Humanloop's alerting system, allowing you to integrate alerts into your existing workflows and
+communication channels. By leveraging webhooks, you can:
+
+ 1. Receive real-time notifications when alert conditions are met
+ 2. Integrate alerts with your preferred messaging platforms (e.g., Slack, Microsoft Teams)
+ 3. Trigger automated responses or workflows in external systems
+ 4. Centralize alert management in your existing incident response tools
+
+Setting up webhooks enables you to respond quickly to critical events, maintain system health, and streamline your MLOps
+processes. Many Humanloop users find webhooks invaluable for managing their AI systems effectively at scale.
+
+For detailed instructions on setting up webhooks, please refer to our Set up Webhooks [/docs/observability/guides/set-up-webhooks]
+guide.
+
+
+GUARDRAILS
+
+Guardrails are protective measures implemented to prevent undesired actions or states in your Humanloop environment. They act as a
+safety net, automatically enforcing rules and limits to maintain system integrity.
+
+
+OVERVIEW
+
+Guardrails typically work by setting boundaries on various system parameters and automatically taking action when these boundaries
+are approached or exceeded.
+
+
+HOW GUARDRAILS WORKS IN HUMANLOOP
+
+ 1. set up evaluators
+ 2. configure them as a guardrail
+ * specify the type of guardrail (e.g. rate limiting, content moderation, etc.)
+ * specify the threshold for the guardrail
+ * specify the action to take when the guardrail is violated
+
+
+USE CASES FOR GUARDRAILS
+
+ 1. Content Moderation
+
+ * Use Case: Automatically filter or flag inappropriate, offensive, or harmful content generated by LLMs.
+ * Benefit: Maintain a safe and respectful environment for users, comply with content policies.
+
+ 2. PII Protection
+
+ * Use Case: Detect and redact personally identifiable information (PII) in LLM outputs.
+ * Benefit: Ensure data privacy, comply with regulations like GDPR and CCPA.
+
+ 3. Bias Detection
+
+ * Use Case: Identify and mitigate biased language or unfair treatment in LLM responses.
+ * Benefit: Promote fairness and inclusivity, reduce discriminatory outputs.
+
+ 4. Fairness Assurance
+
+ * Use Case: Ensure equal treatment and representation across different demographic groups in LLM interactions.
+ * Benefit: Maintain ethical AI practices, avoid reinforcing societal biases.
+
+ 5. Toxicity Filtering
+
+ * Use Case: Detect and prevent the generation of toxic, abusive, or hateful content.
+ * Benefit: Create a positive user experience, protect brand reputation.
+
+ 6. Hallucination Protections
+
+ * Use Case: Detect and prevent the generation of false or fabricated information by the LLM.
+ * Benefit: Ensure output reliability, maintain user trust, and avoid potential misinformation spread.
+
+
+BEST PRACTICES FOR IMPLEMENTING GUARDRAILS
+
+ 1. Start Conservative: Begin with more restrictive guardrails and loosen them as you gain confidence.
+ 2. Monitor Guardrail Actions: Keep track of when and why guardrails are triggered to identify patterns.
+ 3. Regular Reviews: Periodically assess the effectiveness of your guardrails and adjust as needed.
+ 4. Provide Override Mechanisms: Allow authorized personnel to bypass guardrails in controlled situations.
+ 5. Document Thoroughly: Maintain clear documentation of all implemented guardrails for team awareness.
+
+{/* WIP - for gartner /end */}",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/observability/alerts-and-guardrails",
+ "title": "Alerts and Guardrails",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -5717,6 +9174,119 @@ In this guide, we will demonstrate how to create and use online evaluators to ob
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Observability",
+ "How-To Guides",
+ ],
+ "content": "This feature is not available for the Free tier. Please contact us if you wish to learn more about our [Enterprise
+plan](https://humanloop.com/pricing)
+
+
+CREATE AN ONLINE EVALUATOR
+
+
+PREREQUISITES
+
+ * You need to have access to evaluations.
+ * You also need to have a Prompt – if not, please follow our Prompt creation [/docs/development/guides/create-prompt] guide.
+ * Finally, you need at least a few logs in your project. Use the Editor to generate some logs if you don't have any yet.
+
+To set up an online Python evaluator:
+
+### Go to the **Evaluations** page in one of your projects and select the **Evaluators** tab ### Select **+ New Evaluator** and
+choose **Code Evaluator** in the dialog
+
+Selecting the type of a new evaluator [file:ae416e3c-b35e-44ac-8f1b-df468e180299]
+
+
+FROM THE LIBRARY OF PRESETS ON THE LEFT-HAND SIDE, WE'LL CHOOSE VALID JSON FOR THIS GUIDE. YOU'LL SEE A PRE-POPULATED EVALUATOR
+WITH PYTHON CODE THAT CHECKS THE OUTPUT OF OUR MODEL IS VALID JSON GRAMMAR.
+
+The evaluator editor after selecting **Valid JSON** preset [file:0926fe33-2c96-4b99-922a-aa777f7590fe]
+
+
+IN THE DEBUG CONSOLE AT THE BOTTOM OF THE DIALOG, CLICK RANDOM LOGS FROM PROJECT. THE CONSOLE WILL BE POPULATED WITH FIVE
+DATAPOINTS FROM YOUR PROJECT.
+
+The debug console (you can resize this area to make it easier to view the logs) [file:6a1798be-fa63-4dc5-b13d-0aceac0500f9]
+
+
+CLICK THE RUN BUTTON AT THE FAR RIGHT OF ONE OF THE LOG ROWS. AFTER A MOMENT, YOU'LL SEE THE RESULT COLUMN POPULATED WITH A TRUE
+OR FALSE.
+
+The **Valid JSON** evaluator returned \`True\` for this particular log, indicating the text output by the model was grammatically
+correct JSON. [file:7080ea4b-4bea-4871-b9a4-a234eb3b9d5d]
+
+
+EXPLORE THE LOG DICTIONARY IN THE TABLE TO HELP UNDERSTAND WHAT IS AVAILABLE ON THE PYTHON OBJECT PASSED INTO THE EVALUATOR.
+
+
+CLICK CREATE ON THE LEFT SIDE OF THE PAGE.
+
+
+ACTIVATE AN EVALUATOR FOR A PROJECT
+
+### On the new **Valid JSON ** evaluator in the Evaluations tab, toggle the switch to **on** - the evaluator is now activated for
+the current project.
+
+Activating the new evaluator to run automatically on your project. [file:a3a01a65-2832-43bf-a0e1-fec1f1b3157e]
+
+
+GO TO THE EDITOR, AND GENERATE SOME FRESH LOGS WITH YOUR MODEL.
+
+
+OVER IN THE LOGS TAB YOU'LL SEE THE NEW LOGS. THE VALID JSON EVALUATOR RUNS AUTOMATICALLY ON THESE NEW LOGS, AND THE RESULTS ARE
+DISPLAYED IN THE TABLE.
+
+The **Logs** table includes a column for each activated evaluator in your project. Each activated evaluator runs on any new logs
+in the project. [file:ac285d9d-e0ef-4c41-b57f-0be0d0f2bddc]
+
+
+TRACK THE PERFORMANCE OF MODELS
+
+
+PREREQUISITES
+
+ * A Humanloop project with a reasonable amount of data.
+ * An Evaluator activated in that project.
+
+To track the performance of different model configs in your project:
+
+
+GO TO THE DASHBOARD TAB.
+
+In the table of model configs at the bottom, choose a subset of the project's model configs.
+
+
+USE THE GRAPH CONTROLS
+
+At the top of the page to select the date range and time granularity of interest.
+
+
+REVIEW THE RELATIVE PERFORMANCE
+
+For each activated Evaluator shown in the graphs, you can see the relative performance of the model configs you selected.
+
+[file:5b3dbcef-c44a-44ed-84c1-b6f3f7f7dd8a] The following Python modules are available to be imported in your code evaluators:
+ * re
+ * math
+ * random
+ * datetime
+ * json (useful for validating JSON grammar as per the example above)
+ * jsonschema (useful for more fine-grained validation of JSON output - see the in-app example)
+ * sqlglot (useful for validating SQL query grammar)
+ * requests (useful to make further LLM calls as part of your evaluation - see the in-app example for a suggestion of how to get
+ started).",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/observability/guides/set-up-monitoring",
+ "title": "Set up Monitoring",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -5916,6 +9486,181 @@ In this guide, we'll walk you through the process of setting up webhooks using t
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Observability",
+ "How-To Guides",
+ ],
+ "content": "This content is currently under development. Please refer to our [V4 documentation](https://docs.humanloop.com/v4) for the current
+docs. This feature is not available for the Free tier. Please contact us if you wish to learn more about our [Enterprise
+plan](https://humanloop.com/pricing)
+
+{/* WIP - for gartner */}
+
+In this guide, we'll walk you through the process of setting up webhooks using the Humanloop API to notify you in Slack when
+certain events occur with your monitoring evaluators.
+
+
+PREREQUISITES
+
+Before you begin, make sure you have:
+
+ * A Humanloop account with API access
+ * A Slack workspace where you have permissions to add webhooks
+ * A Humanloop project with at least one LLM model and monitoring evaluator set up
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+
+SETTING UP A WEBHOOK
+
+To set up a webhook, you'll use the hl.webhook.create() method from the Humanloop Python SDK. Here's a step-by-step guide:
+
+
+CREATE A SLACK INCOMING WEBHOOK
+
+ 1. Go to your Slack workspace and create a new Slack app (or use an existing one).
+ 2. Under "Add features and functionality", choose "Incoming Webhooks" and activate them.
+ 3. Click "Add New Webhook to Workspace" and choose the channel where you want to receive notifications.
+ 4. Copy the webhook URL provided by Slack.
+
+
+IMPORT THE HUMANLOOP SDK AND INITIALIZE THE CLIENT
+
+import humanloop as hl
+
+hl.init(api_key="your-api-key")
+
+
+Replace "your-api-key" with your actual Humanloop API key.
+
+
+CREATE A WEBHOOK
+
+webhook = hl.webhook.create(
+ url="https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK",
+ description="Webhook for monitoring evaluator alerts",
+ events=["EVALUATION_COMPLETED", "DRIFT_DETECTED"],
+ model_name="your-model-name",
+ status="ACTIVE",
+ http_url_spec={
+ "secret": "your-shared-secret"
+ }
+)
+
+
+Replace the following:
+
+ * "https://hooks.slack.com/services/YOUR/SLACK/WEBHOOK" with your Slack webhook URL
+ * "your-model-name" with the name of the model you want to monitor
+ * "your-shared-secret" with a secret string of your choice for added security
+
+
+TEST THE WEBHOOK
+
+To test if your webhook is working correctly, you can trigger an evaluation:
+
+
+evaluation_run = hl.evaluations.create(
+ project_id=PROJECT_ID,
+ config_id=CONFIG_ID,
+ dataset_id=DATASET_ID,
+ evaluator_ids=[EVALUATOR_ID],
+ hl_generated=False,
+)
+
+
+Replace "your-project-id" and "your-model-name" with your actual project ID and model name.
+
+
+VERIFYING THE WEBHOOK
+
+After setting up the webhook and triggering an evaluation, you should see a message in your specified Slack channel. The message
+will contain details about the evaluation event, such as:
+
+New event: EVALUATION_COMPLETED
+Model: your-model-name
+Timestamp: 2023-07-29T12:34:56Z
+Evaluation ID: eval_123456
+Result: Pass/Fail
+
+
+
+MANAGING WEBHOOKS
+
+You can list, update, or delete webhooks using the following methods:
+
+# List all webhooks
+webhooks = hl.webhook.list()
+
+# Update a webhook
+updated_webhook = hl.webhook.update(
+ id="webhook-id",
+ description="Updated description",
+ status="DISABLED"
+)
+
+# Delete a webhook
+hl.webhook.delete(id="webhook-id")
+
+
+Replace "webhook-id" with the ID of the webhook you want to manage.
+
+
+CONCLUSION
+
+You've now set up a webhook to receive notifications in Slack when your monitoring evaluators complete evaluations or detect
+drift. This will help you stay informed about the performance and behavior of your LLM models in real-time.
+
+{/* /WIP - for gartner */}",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/observability/guides/set-up-webhooks",
+ "title": "Set up Webhooks",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -6204,6 +9949,203 @@ If you have not done so, you can follow our guide to [create a Human Evaluator](
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Observability",
+ "How-To Guides",
+ ],
+ "content": "This guide shows how to use the Humanloop SDK to record end-user feedback on Logs.
+
+Different use-cases and user interfaces may require different kinds of feedback that need to be mapped to the appropriate end user
+interaction. There are broadly 3 important kinds of feedback:
+
+ 1. Explicit feedback: these are purposeful actions to review the generations. For example, ‘thumbs up/down’ button presses.
+ 2. Implicit feedback: indirect actions taken by your users may signal whether the generation was good or bad, for example,
+ whether the user ‘copied’ the generation, ‘saved it’ or ‘dismissed it’ (which is negative feedback).
+ 3. Free-form feedback: Corrections and explanations provided by the end-user on the generation.
+
+You should create Human Evaluators structured to capture the feedback you need. For example, a Human Evaluator with return type
+"text" can be used to capture free-form feedback, while a Human Evaluator with return type "multi_select" can be used to capture
+user actions that provide implicit feedback.
+
+If you have not done so, you can follow our guide to create a Human Evaluator [/docs/evaluation/guides/human-evaluator] to set up
+the appropriate feedback schema.
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/development/guides/create-prompt] guide first.
+ * You have created a Human Evaluator. This can be done by following the steps in our guide to Human Evaluator creation
+ [/docs/evaluation/guides/human-evaluators].
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+
+ATTACH HUMAN EVALUATOR TO ENABLE FEEDBACK
+
+In this example, we'll be attaching a "Tweet Issues" Human Evaluator to an "Impersonator" Prompt. The specifics of the "Tweet
+Issues" Evaluator are not important for this guide, but for completeness, it is a Human Evaluator with the return type
+"multi_select" and options like "Inappropriate", "Too many emojis", "Too long", etc.
+
+
+GO TO THE PROMPT'S DASHBOARD
+
+
+CLICK MONITORING IN THE TOP RIGHT TO OPEN THE MONITORING DIALOG
+
+Prompt dashboard showing Monitoring dialog [file:f1cad24c-fe04-4a22-a8a9-1a7035b7bb02]
+
+
+CLICK CONNECT EVALUATORS AND SELECT THE HUMAN EVALUATOR YOU CREATED.
+
+Dialog connecting the "Tweet Issues" Evaluator as a Monitoring Evaluator [file:08b215e4-b0ef-437d-a168-be8ff4adc9a7]
+
+You should now see the selected Human Evaluator attached to the Prompt in the Monitoring dialog.
+
+Monitoring dialog showing the "Tweet Issues" Evaluator attached to the Prompt [file:8d7690fe-b39c-4cf4-9a64-2e88d1048a8a]
+
+
+RECORD FEEDBACK AGAINST A LOG BY ITS ID
+
+With the Human Evaluator attached to the Prompt, you can now record judgments against the Prompt's Logs. To make API calls to
+record feedback, you will need the Log ID of the Log you want to record feedback against. The steps below illustrate a typical
+workflow for recording feedback against a Log generated in your code.
+
+
+RETRIEVE THE LOG ID FROM THE CLIENT.PROMPTS.CALL() RESPONSE.
+
+log = client.prompts.call(
+ version_id="prv_qNeXZp9P6T7kdnMIBHIOV",
+ path="persona",
+ messages=[{"role": "user", "content": "What really happened at Roswell?"}],
+ inputs={"person": "Trump"},
+)
+log_id = log.id
+
+
+
+CALL CLIENT.EVALUATORS.LOG(...) REFERENCING THE ABOVE LOG ID AS PARENT_ID TO RECORD USER FEEDBACK.
+
+feedback_2 = client.evaluators.log(
+ # Pass the \`log_id\` from the previous step to indicate the Log to record feedback against
+ parent_id=log_id,
+ # Here, we're recording feedback against a "Tweet Issues" Human Evaluator,
+ # which is of type \`multi_select\` and has multiple options to choose from.
+ path="Feedback Demo/Tweet Issues",
+ judgment=["Inappropriate", "Too many emojis"],
+)
+
+
+The "rating" and "correction" Evaluators are attached to all Prompts by default. You can record feedback using these Evaluators as
+well.
+
+The "rating" Evaluator can be used to record explicit feedback (e.g. from a 👍/👎 button).
+
+rating_log = client.evaluators.log(
+ parent_id=log_id,
+ # We're recording feedback using the "rating" Human Evaluator,
+ # which has 2 options: "good" and "bad".
+ path="rating",
+ judgment="good",
+
+ # You can also include the source of the feedback when recording it with the \`user\` parameter.
+ user="user_123",
+)
+
+
+The "correction" Evaluator can be used to record user-provided corrections to the generations (e.g. If the user edits the
+generation before copying it).
+
+correction_log = client.evaluators.log(
+ parent_id=log_id,
+ path="correction",
+ judgment="NOTHING happened at Roswell, folks! Fake News media pushing ALIEN conspiracy theories. SAD! "
+ + "I know Area 51, have the best aliens. Roswell? Total hoax! Believe me. 👽🚫 #Roswell #FakeNews",
+)
+
+
+If the user removes their feedback (e.g. if the user deselects a previous 👎 feedback), you can record this by passing
+judgment=None.
+
+removed_rating_log = client.evaluators.log(
+ parent_id=log_id,
+ path="rating",
+ judgment=None,
+)
+
+
+
+VIEWING FEEDBACK
+
+You can view the applied in two main ways: through the Logs that the feedback was applied to, and through the Human Evaluator
+itself.
+
+
+VIEWING FEEDBACK APPLIED TO LOGS
+
+The feedback recorded for each Log can be viewed in the Logs table of your Prompt.
+
+Logs table showing feedback applied to Logs [file:04d01f09-c428-418f-940a-d42d8a4eba47]
+
+Your internal users can also apply feedback to the Logs directly through the Humanloop app.
+
+Log drawer showing feedback section [file:eda99eac-1558-4183-ac3b-37e6d2db012f]
+
+
+VIEWING FEEDBACK THROUGH ITS HUMAN EVALUATOR
+
+Alternatively, you can view all feedback recorded for a specific Evaluator in the Logs tab of the Evaluator. This will display all
+feedback recorded for the Evaluator across all other Files.
+
+Logs table for "Tweet Issues" Evaluator showing feedback [file:03ee0b41-54c4-492e-8d6a-d78edb2c1eac]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/observability/guides/capture-user-feedback",
+ "title": "Capture user feedback",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -6520,6 +10462,40 @@ A user can be one of the following rolws:
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Organization Management",
+ ],
+ "content": "Everyone invited to the organization can access all projects currently (controlling project access coming soon).
+
+A user can be one of the following rolws:
+
+Admin: The highest level of control. They can manage, modify, and oversee the Organization's settings and have full functionality
+across all projects.
+
+Developer: (Enterprise tier only) Can deploy Files, manage environments, create and add API keys, but lacks the ability to access
+billing or invite others.
+
+Member: (Enterprise tier only) The basic level of access. Can create and save Files, run Evaluations, but not deploy. Can not see
+any org-wide API keys.
+
+
+RBACS SUMMARY
+
+Here is the full breakdown of roles and access:
+
+Action Member Developer Admin Create and manage Files ✔️ ✔️ ✔️ Inspect logs and feedback ✔️ ✔️ ✔️ Create and manage Evaluators ✔️
+✔️ ✔️ Run Evaluations ✔️ ✔️ ✔️ Create and manage Datasets ✔️ ✔️ ✔️ Create and manage API keys ✔️ ✔️ Manage prompt deployments ✔️
+✔️ Create and manage environments ✔️ ✔️ Send invites ✔️ Set user roles ✔️ Manage billing ✔️ Change Organization settings ✔️",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/admin/access-roles",
+ "title": "Access roles (RBACs)",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -6575,6 +10551,125 @@ Humanloop offers authentication options to ensure secure access to your organiza
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Organization Management",
+ ],
+ "content": "{/* WIP - for gartner /start */}
+
+Humanloop offers authentication options to ensure secure access to your organization's resources. This guide covers our Single
+Sign-On (SSO) capabilities and other authentication methods.
+
+
+SINGLE SIGN-ON (SSO)
+
+Single Sign-On allows users to access multiple applications with a single set of credentials. Humanloop supports SSO integration
+with major identity providers, enhancing security and simplifying user management.
+
+
+SUPPORTED SSO PROVIDERS
+
+ * Google Workspace
+ * Okta
+ * Azure Active Directory
+ * OneLogin
+ * Custom SAML 2.0 providers
+
+
+BENEFITS OF SSO
+
+ 1. Enhanced security with centralized authentication
+ 2. Simplified user management
+ 3. Improved user experience with reduced password fatigue
+ 4. Streamlined onboarding and offboarding processes
+
+
+SETTING UP SSO
+
+To set up SSO for your organization:
+
+ 1. Contact our sales team to enable SSO for your account
+ 2. Choose your identity provider
+ 3. Configure the connection between Humanloop and your identity provider
+ 4. Test the SSO integration
+ 5. Roll out to your users
+
+
+MULTI-FACTOR AUTHENTICATION (MFA)
+
+For accounts not using SSO, we strongly recommend enabling Multi-Factor Authentication for an additional layer of security.
+
+
+MFA OPTIONS
+
+ * Time-based One-Time Password (TOTP) apps
+ * SMS-based verification
+ * Hardware security keys (e.g., YubiKey)
+
+
+API AUTHENTICATION
+
+For programmatic access to Humanloop, we use API keys. These should be kept secure and rotated regularly.
+
+
+MANAGING API KEYS
+
+ * Generate API keys in your account settings
+ * Use environment variables to store API keys in your applications
+ * Implement key rotation policies for enhanced security
+
+
+USER PROVISIONING AND DEPROVISIONING
+
+Humanloop supports automated user lifecycle management through our Directory Sync feature. This allows for:
+
+ * Automatic user creation based on directory group membership
+ * Real-time updates to user attributes and permissions
+ * Immediate deprovisioning when users are removed from directory groups
+
+
+BEST PRACTICES
+
+ 1. Use SSO when possible for centralized access control
+ 2. Enable MFA for all user accounts
+ 3. Regularly audit user access and permissions
+ 4. Implement the principle of least privilege
+ 5. Use secure protocols (HTTPS) for all communications with Humanloop
+
+For more information on setting up SSO or other authentication methods, please contact our support team or refer to our API
+documentation.
+
+
+ACTIVE DIRECTORY SYNC
+
+Humanloop supports Active Directory Sync for automated user provisioning and deprovisioning. This feature allows you to:
+
+ * Automatically create and update user accounts based on your Active Directory groups
+ * Sync user attributes and roles in real-time
+ * Instantly deprovision access when users are removed from AD groups
+ * Maintain consistent access control across your organization
+ * Reduce manual user management tasks and potential security risks
+
+To set up Active Directory Sync:
+
+ 1. Contact our sales team to enable this feature for your account
+ 2. Configure the connection between Humanloop and your Active Directory
+ 3. Map your AD groups to Humanloop roles and permissions
+ 4. Test the sync process with a small group of users
+ 5. Roll out to your entire organization
+
+For more information on implementing Active Directory Sync, please contact our support team [support@humanloop.com].
+
+{/* WIP - for gartner /end */}",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/admin/sso-and-authentication",
+ "title": "SSO and Authentication",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -6856,6 +10951,49 @@ Inviting people to your organization allows them to interact with your Humanloop
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Organization Management",
+ "How-To Guides",
+ ],
+ "content": "Inviting people to your organization allows them to interact with your Humanloop projects:
+
+ * Teammates will be able to create new model configs and experiments
+ * Developers will be able to get an API key to interact with projects through the SDK
+ * Annotators may provide feedback on logged datapoints using the Data tab (in addition to feedback captured from your end-users
+ via the SDK feedback integration)
+
+
+INVITE USERS
+
+To invite users to your organization:
+
+
+GO TO YOUR ORGANIZATION'S MEMBERS PAGE [https://app.humanloop.com/account/members]
+
+
+ENTER THE EMAIL ADDRESS
+
+Enter the email of the person you wish to invite into the Invite members box.
+
+[file:a9d909b7-eac2-4ccb-b828-e160721c9b94]
+
+
+CLICK SEND INVITE.
+
+An email will be sent to the entered email address, inviting them to the organization. If the entered email address is not already
+a Humanloop user, they will be prompted to create an account before being added to the organization.
+
+🎉 Once they create an account, they can view your projects at the same URL to begin collaborating.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/admin/guides/invite-collaborators",
+ "title": "Invite collaborators",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -6918,6 +11056,67 @@ API keys allow you to access the Humanloop API programmatically in your app.",
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Organization Management",
+ "How-To Guides",
+ ],
+ "content": "CREATE A NEW API KEY
+
+
+GO TO YOUR ORGANIZATION'S API KEYS PAGE [https://app.humanloop.com/account/api-keys].
+
+
+CLICK THE CREATE NEW API KEY BUTTON.
+
+
+ENTER A NAME FOR YOUR API KEY.
+
+Choose a name that helps you identify the key's purpose. You can't change the name of an API key after it's created.
+
+
+CLICK CREATE.
+
+[file:efda5ed0-a0a2-449c-8f26-4c2e092e2917]
+
+
+COPY THE GENERATED API KEY
+
+Save it in a secure location. You will not be shown the full API key again.
+
+[file:5043e675-df30-4288-89c0-06d414a9c896]
+
+
+REVOKE AN API KEY
+
+You can revoke an existing API key if it is no longer needed.
+
+When an API key is revoked, future API requests that use this key will be rejected. Any systems that are dependent on this key
+will no longer work. ### Go to API keys page
+
+Go to your Organization's API Keys page [https://app.humanloop.com/account/api-keys].
+
+
+IDENTIFY THE API KEY
+
+Find the key you wish to revoke by its name or by the displayed trailing characters.
+
+
+CLICK 'REVOKE'
+
+Click the three dots button on the right of its row to open its menu. Click Revoke. A confirmation dialog will be displayed. Click
+Remove.
+
+[file:1c5d15e7-cd82-4ab2-ad35-5da6c8548c5f]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/admin/guides/manage-api-keys",
+ "title": "Manage API keys",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -7027,6 +11226,64 @@ Environments enable you to deploy different versions of your files, enabling mul
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Organization Management",
+ "How-To Guides",
+ ],
+ "content": "CREATE A NEW ENVIRONMENT
+
+Only Enterprise customers can create more than one environment.
+
+
+GO TO YOUR ORGANIZATION'S ENVIRONMENTS PAGE [https://app.humanloop.com/account/environments].
+
+
+CLICK THE + ENVIRONMENT BUTTON.
+
+
+ENTER A NAME FOR YOUR ENVIRONMENT.
+
+Choose a name that is relevant to the development workflow you intend to support, such as staging or development.
+
+
+CLICK CREATE.
+
+[file:ce6aae21-5891-4d66-9609-3a4fef9a0386]
+
+
+RENAME AN ENVIRONMENT
+
+You can rename an environment to re-arrange your development workflows. Since each new file is automatically deployed to the
+default environment, which is production unless altered, it may make more sense to create a separate production environment and
+rename your current environments.
+
+Renaming the environments will take immediate effect, so ensure that this change is planned and does not disrupt your production
+workflows. ### Go to environments page
+
+Go to your Organization's environments page [https://app.humanloop.com/account/environments].
+
+
+IDENTIFY THE ENVIRONMENTS
+
+Find the environments you wish to rename.
+
+
+CLICK 'RENAME'
+
+Click the three dots button on the right of its row to open its menu. Click Rename. A confirmation dialog will be displayed.
+Update the name and click Rename.
+
+[file:b7cd1d59-d2d2-45f9-91fe-8a8896200b81]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/admin/guides/manage-environments",
+ "title": "Manage Environments",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -7143,6 +11400,32 @@ Our menu of hosting options is as follows from basic to more advanced:
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Reference",
+ ],
+ "content": "Humanloop offers a broad range of hosting environments to meet the security and compliance needs of enterprise customers.
+
+Our menu of hosting options is as follows from basic to more advanced:
+
+ 1. Default: Our multi-tenanted cloud offering is SOC2 compliant and hosted in AWS US-east region on AWS.
+ 2. Region specific: Same as 1, but where additional region requirements for data storage are required - e.g. data can never leave
+ the EU for GDPR reasons. We offer UK, EU and US guarantees for data storage regions.
+ 3. Dedicated: We provision your own dedicated instance of Humanloop in your region of choice. With the additional added benefits:
+ * Full HIPAA compliant [https://aws.amazon.com/compliance/hipaa-compliance/] AWS setup.
+ * Ability to manage your own encryption keys in KMS.
+ * Ability to subscribe to application logging and cloudtrail infrastructure monitoring.
+ 4. Self-hosted: You deploy an instance of Humanloop within your own VPC on AWS. We provide an infra as code setup with Pulumi
+ [https://www.pulumi.com/] to easily spin up a Humanloop instance in your VPC.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/reference/deployment-options",
+ "title": "Deployment Options",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -7162,6 +11445,69 @@ Humanloop supports all the major large language model providers, including OpenA
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Reference",
+ ],
+ "content": "Humanloop supports all the major large language model providers, including OpenAI, Anthropic, Google, Azure, and more.
+Additionally, you can use your own custom models with with the API and still benefit from the Humanloop platform.
+
+
+PROVIDERS
+
+Here is a summary of which providers we support and whether
+
+Provider Models Cost information Token information OpenAI ✅ ✅ ✅ Anthropic ✅ ✅ ✅ Google ✅ ✅ ✅ Azure ✅ ✅ ✅ Cohere ✅ ✅ ✅ Llama ✅ Groq
+✅ AWS Bedrock Anthropic, Llama Custom ✅ User-defined User-defined
+
+Adding in more providers is driven by customer demand. If you have a specific provider or model you would like to see supported,
+please reach out to us at support@humanloop.com [support@humanloop.com].
+
+
+MODELS
+
+Provider Key Max Prompt Tokens Max Output Tokens Cost per Prompt Token Cost per Output Token Tool Support Image Support OpenAI
+gpt-4 8192 4096 $0.00003 $0.00006 ✅ ❌ OpenAI gpt-4o 128000 4096 $0.000005 $0.000015 ✅ ✅ OpenAI gpt-4-turbo 128000 4096 $0.00001
+$0.00003 ✅ ✅ OpenAI gpt-4-turbo-2024-04-09 128000 4096 $0.00001 $0.00003 ✅ ❌ OpenAI gpt-4-0 8192 4096 $0.00003 $0.00003 ✅ ❌ OpenAI
+gpt-4-32k 32768 4096 $0.00003 $0.00003 ✅ ❌ OpenAI gpt-4-1106-preview 128000 4096 $0.00001 $0.00003 ✅ ❌ OpenAI gpt-4-0125-preview
+128000 4096 $0.00001 $0.00003 ✅ ❌ OpenAI gpt-4-vision 128000 4096 $0.00001 $0.00003 ✅ ✅ OpenAI gpt-4-1106-vision-preview 16385
+4096 $0.0000015 $0.000002 ✅ ❌ OpenAI gpt-3.5-turbo 16385 4096 $0.0000015 $0.000002 ✅ ❌ OpenAI gpt-3.5-turbo-instruct 8192 4097
+$0.0000015 $0.000002 ✅ ❌ OpenAI baggage-002 16384 16384 $0.0000004 $0.0000004 ✅ ❌ OpenAI davinci-002 16384 16384 $0.000002
+$0.000002 ✅ ❌ OpenAI ft:gpt-3.5-turbo 4097 4096 $0.000003 $0.000006 ✅ ❌ OpenAI ft:davinci-002 16384 16384 $0.000002 $0.000002 ✅ ❌
+OpenAI text-moderation 32768 32768 $0.000003 $0.000004 ✅ ❌ Anthropic claude-3-opus-20240229 200000 4096 $0.000015 $0.000075 ✅ ❌
+Anthropic claude-3-sonnet-20240229 200000 4096 $0.000003 $0.000015 ✅ ❌ Anthropic claude-3-haiku-20240307 200000 4096 $0.00000025
+$0.00000125 ✅ ❌ Anthropic claude-2.1 100000 4096 $0.00000025 $0.000024 ❌ ❌ Anthropic claude-2 100000 4096 $0.000008 $0.000024 ❌ ❌
+Anthropic claude-instant-1.2 100000 4096 $0.000008 $0.000024 ❌ ❌ Anthropic claude-instant-1 100000 4096 $0.0000008 $0.0000024 ❌ ❌
+Groq mixtral-8x7b-32768 32768 32768 $0.0 $0.0 ❌ ❌ Groq llama3-8b-8192 8192 8192 $0.0 $0.0 ❌ ❌ Groq llama3-70b-8192 8192 8192 $0.0
+$0.0 ❌ ❌ Groq llama2-70b-4096 4096 4096 $0.0 $0.0 ❌ ❌ Groq gemma-7b-it 8192 8192 $0.0 $0.0 ❌ ❌ Replicate llama-3-70b-instruct 8192
+8192 $0.00000065 $0.00000275 ❌ ❌ Replicate llama-3-70b 8192 8192 $0.00000065 $0.00000275 ❌ ❌ Replicate llama-3-8b-instruct 8192
+8192 $0.00000005 $0.00000025 ❌ ❌ Replicate llama-3-8b 8192 8192 $0.00000005 $0.00000025 ❌ ❌ Replicate llama-2-70b 4096 4096
+$0.00003 $0.00006 ❌ ❌ Replicate llama70b-v2 4096 4096 N/A N/A ❌ ❌ Replicate mixtral-8x7b 4096 4096 N/A N/A ❌ ❌ OpenAI_Azure gpt-4o
+128000 4096 $0.000005 $0.000015 ✅ ✅ OpenAI_Azure gpt-4o-2024-05-13 128000 4096 $0.000005 $0.000015 ✅ ✅ OpenAI_Azure
+gpt-4-turbo-2024-04-09 128000 4096 $0.00003 $0.00006 ✅ ✅ OpenAI_Azure gpt-4 8192 4096 $0.00003 $0.00006 ✅ ❌ OpenAI_Azure
+gpt-4-0314 8192 4096 $0.00003 $0.00006 ✅ ❌ OpenAI_Azure gpt-4-32k 32768 4096 $0.00006 $0.00012 ✅ ❌ OpenAI_Azure gpt-4-0125 128000
+4096 $0.00001 $0.00003 ✅ ❌ OpenAI_Azure gpt-4-1106 128000 4096 $0.00001 $0.00003 ✅ ❌ OpenAI_Azure gpt-4-0613 8192 4096 $0.00003
+$0.00006 ✅ ❌ OpenAI_Azure gpt-4-turbo 128000 4096 $0.00001 $0.00003 ✅ ❌ OpenAI_Azure gpt-4-turbo-vision 128000 4096 $0.000003
+$0.000004 ✅ ✅ OpenAI_Azure gpt-4-vision 128000 4096 $0.000003 $0.000004 ✅ ✅ OpenAI_Azure gpt-35-turbo-1106 16384 4096 $0.0000015
+$0.000002 ✅ ❌ OpenAI_Azure gpt-35-turbo-0125 16384 4096 $0.0000005 $0.0000015 ✅ ❌ OpenAI_Azure gpt-35-turbo-16k 16384 4096
+$0.000003 $0.000004 ✅ ❌ OpenAI_Azure gpt-35-turbo 4097 4096 $0.0000015 $0.000002 ✅ ❌ OpenAI_Azure gpt-3.5-turbo-instruct 4097 4096
+$0.0000015 $0.000002 ✅ ❌ OpenAI_Azure gpt-35-turbo-instruct 4097 4097 $0.0000015 $0.000002 ✅ ❌ Cohere command-r 128000 4000
+$0.0000005 $0.0000015 ❌ ❌ Cohere command-light 4096 4096 $0.000015 $0.000015 ❌ ❌ Cohere command-r-plus 128000 4000 $0.000003
+$0.000015 ❌ ❌ Cohere command-nightly 4096 4096 $0.000015 $0.000015 ❌ ❌ Cohere command 4096 4096 $0.000015 $0.000015 ❌ ❌ Cohere
+command-medium-beta 4096 4096 $0.000015 $0.000015 ❌ ❌ Cohere command-xlarge-beta 4096 4096 $0.000015 $0.000015 ❌ ❌ Google
+gemini-pro-vision 16384 2048 $0.00000025 $0.0000005 ❌ ✅ Google gemini-1.0-pro-vision 16384 2048 $0.00000025 $0.0000005 ❌ ✅ Google
+gemini-pro 32760 8192 $0.00000025 $0.0000005 ❌ ❌ Google gemini-1.0-pro 32760 8192 $0.00000025 $0.0000005 ❌ ❌ Google
+gemini-1.5-pro-latest 1000000 8192 $0.00000025 $0.0000005 ❌ ❌ Google gemini-1.5-pro 1000000 8192 $0.00000025 $0.0000005 ❌ ❌ Google
+gemini-experimental 1000000 8192 $0.00000025 $0.0000005 ❌ ❌",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/reference/supported-models",
+ "title": "Supported Models",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -7300,6 +11646,134 @@ Our \`.prompt\` file format is a serialized version of a model config that is de
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Reference",
+ ],
+ "content": "Our .prompt file format is a serialized version of a model config that is designed to be human-readable and suitable for checking
+into your version control systems alongside your code.
+
+
+FORMAT
+
+The .prompt file is heavily inspired by MDX [https://mdxjs.com/], with model and hyperparameters specified in a YAML header
+alongside a JSX-inspired format for your Chat Template.
+
+
+BASIC EXAMPLES
+
+\`\`\`jsx Chat --- model: gpt-4 temperature: 1.0 max_tokens: -1 provider: openai endpoint: chat --- You are a friendly assistant. \`\`\`
+\`\`\`jsx Completion --- model: claude-2 temperature: 0.7 max_tokens: 256 top_p: 1.0 provider: anthropic endpoint: complete ---
+Autocomplete the sentence.
+
+Context: {{context}}
+
+{{sentence}}
+
+
+
+### Multi-modality and Images
+
+Images can be specified using nested \`\` tags within a \`\` message. To specify text alongside the image, use a \`\` tag.
+
+\`\`\`jsx Image and Text
+---
+model: gpt-4-vision-preview
+temperature: 0.7
+max_tokens: 256
+provider: openai
+endpoint: chat
+tools: []
+---
+
+ You are a friendly assistant.
+
+
+
+
+ What is in this image?
+
+
+
+\`\`\`
+
+### Tools, tool calls and tool responses
+
+Specify the tools available to the model as a JSON list in the YAML header.
+
+Tool calls in assistant messages can be added with nested \`\` tags. A \`\` tag within an \`\` tag denotes a tool call of \`type: "function"\`, and requires the attributes \`name\` and \`id\`. The text wrapped in a \`\` tag should be a JSON-formatted string containing the tool call's arguments.
+
+Tool call responses can then be added with \`\` tags after the \`\` message.
+
+\`\`\`jsx
+---
+model: gpt-4
+temperature: 0.7
+max_tokens: 256
+top_p: 1.0
+presence_penalty: 0.0
+frequency_penalty: 0.0
+provider: openai
+endpoint: chat
+tools: [
+ {
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "name": "Location",
+ "description": "The city and state, e.g. San Francisco, CA"
+ },
+ "unit": {
+ "type": "string",
+ "name": "Unit",
+ "enum": [
+ "celsius",
+ "fahrenheit"
+ ]
+ }
+ },
+ "required": [
+ "location"
+ ]
+ }
+ }
+]
+---
+
+ You are a friendly assistant.
+
+
+
+ What is the weather in SF?
+
+
+
+
+ {
+ "location": "San Francisco, CA"
+ }
+
+
+
+
+
+ Cloudy with a chance of meatballs.
+
+\`\`\`
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/reference/prompt-file-format",
+ "title": "Prompt File Format",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -7516,6 +11990,27 @@ Visit our [Github examples repo](https://github.com/humanloop/examples) for a co
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Reference",
+ ],
+ "content": "Visit our Github examples repo [https://github.com/humanloop/examples] for a collection of usage examples of Humanloop.
+
+
+CONTENTS
+
+Github Description SDK Chat Logging Tool Calling Streaming chatbot-starter [https://github.com/humanloop/chatbot-starter/] An
+open-source AI chatbot app template built with Next.js, the Vercel AI SDK, OpenAI, and Humanloop. TypeScript ✔️ ✔️ ✔️ asap
+[https://github.com/humanloop/asap] CLI assistant for solving dev issues in your projects or the command line. TypeScript ✔️ ✔️ ✔️",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/reference/example-projects",
+ "title": "Example Projects",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -7558,6 +12053,47 @@ to run them natively with your Prompts in our Editor and UI based Evaluation wor
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Reference",
+ ],
+ "content": "Humanloop allows you to specify the runtime for your code Evaluators [../concepts/evaluators] and Tool [../concepts/tools]
+implementations in order to run them natively with your Prompts in our Editor and UI based Evaluation workflows.
+
+
+ENVIRONMENT DETAILS
+
+Python version: 3.11.4
+
+anthropic==0.29.0
+continuous-eval==0.3.13
+jellyfish==1.1.0
+jsonschema==4.22.0
+langdetect==1.0.9
+nltk==3.8.1
+numpy==1.26.4
+openai==1.35.10
+pandas==2.2.2
+pydantic==2.8.2
+requests==2.32.3
+scikit-learn==1.5.1
+spacy==3.7.5
+sqlglot==25.5.1
+syllapy==0.7.2
+textstat==0.7.3
+transformers==4.43.4
+
+
+If you have any specific packages you would like to see here, please let us know at support@humanloop.com [support@humanloop.com].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/reference/python-environment",
+ "title": "Humanloop Runtime Environment",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -7620,6 +12156,66 @@ Humanloop offers a variety of integrations to enhance your workflow and extend t
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Reference",
+ ],
+ "content": "{/* WIP - for gartner /start */}
+
+Humanloop offers a variety of integrations to enhance your workflow and extend the platform's capabilities. These integrations
+allow you to seamlessly connect Humanloop with other tools and services, improving efficiency and expanding functionality.
+
+
+NATIVE INTEGRATIONS:
+
+These integrations are built directly into Humanloop and offer seamless, out-of-the-box connectivity:
+
+ * Git: Integrate your Git repositories (GitHub, GitLab, Bitbucket) with Humanloop for syncronized version control and
+ collaboration.
+ * Pinecone Search: Perform vector similarity searches using Pinecone vector DB and OpenAI embeddings.
+ * Postman [https://www.postman.com/humanloop/humanloop/overview]: Simplify API testing and development with Postman integration.
+ * Zapier [https://zapier.com/apps/humanloop/integrations]: Automate workflows by connecting Humanloop with thousands of apps.
+ * WorkOS: Streamline enterprise features like Single Sign-On (SSO) and directory sync.
+
+
+API INTEGRATIONS
+
+Expand Humanloop's capabilities with these API-based integrations:
+
+ * Google Search - Access Google search results via the SerpAPI.
+ * GET API - Send GET requests to external APIs directly from Humanloop.
+
+
+THIRD-PARTY INTEGRATIONS:
+
+Leverage Humanloop's API to create custom integrations with other platforms and services. Explore the following resources to get
+started:
+
+ * API Reference Guide [../api-reference]: Comprehensive documentation of Humanloop's API endpoints.
+ * SDK Overview [../api-reference/sdks]: Information on available SDKs for easier integration.
+ * Tool Usage [../concepts/tools#tool-use-function-calling]: Learn how to extend Humanloop's functionality with custom tools.
+
+
+BENEFITS OF INTEGRATIONS
+
+ * Streamline workflows by connecting Humanloop with your existing tools
+ * Extend Humanloop's capabilities with additional data sources and services
+ * Automate tasks and reduce manual work
+ * Customize Humanloop to fit your specific use case and requirements
+
+For assistance with integrations or to request a new integration, please contact our support team at support@humanloop.com
+[support@humanloop.com]
+
+{/* WIP - for gartner /end */}",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/reference/integrations",
+ "title": "Integrations",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -7731,6 +12327,146 @@ Humanloop never trains on user data.",
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Reference",
+ ],
+ "content": "Humanloop is deeply committed to AI governance, security, and compliance. View our Trust Report [https://trust.humanloop.com/] and
+Policy Pages [https://humanloop.com/policies/privacy-policy] to see all of our certifications, request documentation, and view
+high-level details on the controls we adhere to.
+
+Humanloop never trains on user data.
+
+
+HUMANLOOP SECURITY OFFERINGS:
+
+ * Data Privacy and Security
+ * Activate LLMs with your private data, safely and securely. You own your data and models.
+ * Monitoring & Support
+ * End-to-end monitoring of your AI applications, support guarantees from trusted AI experts.
+ * Data Encryption
+ * Data Management & AI Governance
+
+
+USER AUTHENTICATION AND ACCESS CONTROL
+
+
+AUTHENTICATION & ACCESS CONTROL - HUMANLOOP WEB APP
+
+All users of the Humanloop web application require a valid email address and password to use the system:
+
+ * Email addresses are verified on account creation.
+ * Passwords are verified as sufficiently complex.
+ * Passwords are stored using a one-way salted hash.
+ * User access logs are maintained including date, time, user ID, relevant URL, operation performed, and source IP address for
+ audit purposes.
+
+
+AUTHENTICATION & ACCESS CONTROL - HUMANLOOP API
+
+All users of the API are required to authenticate with a unique API token header:
+
+ * Follows the OAuth 2.0 pattern.
+ * API tokens are only visible once on creation and then obfuscated.
+ * Users can manage the expiry of API keys.
+ * API token access logs are maintained including date, time, user ID, relevant URL, operation performed, and source IP address
+ for audit purposes.
+
+
+ADDITIONAL RESOURCES
+
+ * Role-based access control (RBAC) - We implement strict role-based access control (RBAC) for all our systems.
+ * Multi-factor authentication (MFA) - MFA is enforced for all employee accounts.
+
+
+ENCRYPTION STANDARDS
+
+
+ENCRYPTION
+
+Humanloop follows best practices for data management and encryption. All data in transit is secured with TLS/SSL, and all data at
+rest is encrypted using the AES-256 algorithm. All encryption keys are managed using AWS Key Management Service (KMS) as part of
+the VPC definition.
+
+ * All data in transit is encrypted using TLS 1.2 or higher.
+ * Data at rest is encrypted using AES-256 encryption.
+
+
+INFRASTRUCTURE
+
+All sensitive data is encrypted in transit. For Self-Hosted Cloud (VPC) environments, network traffic is also encrypted in transit
+and at rest to meet HIPAA requirements. Sensitive application data is only ever processed within the ECS cluster and stored in
+Aurora. To request a network infrastructure diagram or more information, please contact privacy@humanloop.com
+[privacy@humanloop.com].
+
+Learn More
+
+For more information about how Humanloop processes user data, visit our Data Management & Hosting Options page.
+
+
+SECURITY CERTIFICATIONS
+
+
+SOC2 TYPE II COMPLIANCE
+
+Humanloop is fully SOC2 Type II compliant. Learn more via our Trust Center [https://trust.humanloop.com/] and our Security Policy
+[https://humanloop.com/policies/security-policy] page.
+
+
+HIPAA COMPLIANCE
+
+Humanloop actively works with paying customers to help them achieve HIPAA compliance. Official certification is pending.
+
+To request references or more information, contact sales@humanloop.com [sales@humanloop.com].
+
+HIPAA Compliance via Hosting Environment:
+
+Humanloop offers dedicated platform instances on AWS with HIPAA provisions for enterprise customers that have particularly
+sensitive data. These provisions include:
+
+ * The ability for enterprises to manage their own encryption keys.
+ * A specific AWS Fargate deployment that follows HIPAA practices.
+
+
+GDPR COMPLIANCE
+
+We are fully compliant with the General Data Protection Regulation (GDPR). This includes:
+
+ * Data minimization practices
+ * User rights management
+ * Data processing agreements
+
+
+HOW HUMANLOOP HELPS CUSTOMERS MAINTAIN COMPLIANCE:
+
+ * Self-Hosted Cloud (VPC) environments
+ * Data Processing Agreements (DPAs)
+ * Data Minimization and Retention Policies
+ * Role-Based Access Controls
+ * Data Encryption
+ * Robust Security Measures
+ * Incident Response Plan SLAs
+ * Regular Training & Audits
+
+
+LEARN MORE:
+
+ * Cloud Hosting Options
+ * Data Management Protocols
+ * Security Policy [https://humanloop.com/policies/security-policy]
+ * Privacy Policy [https://humanloop.com/policies/privacy-policy]
+ * Trust Center [https://trust.humanloop.com/]
+
+To request references or more information, contact sales@humanloop.com [sales@humanloop.com]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/reference/security-and-compliance",
+ "title": "Security and Compliance",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -8022,6 +12758,77 @@ An overview of the data management practices and encryption methodologies used b
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Reference",
+ ],
+ "content": "DATA HANDLING AND SEGREGATION
+
+Separate environments are provisioned and maintained for development, quality assurance/user acceptance testing, and production to
+ensure data segregation at the environment level.
+
+
+DATA CLASSIFICATION & ACCESS CONTROL
+
+All platform data received from the user and data derived from user data is classified as sensitive. All platform audit and
+telemetry data that does not contain PII and reference to specific user data is classified as not sensitive.
+
+By default, only authenticated users can see their own sensitive data. Data classified as not sensitive can be accessed by
+dedicated Humanloop support staff using a secure VPN connection to the private network of the VPC for the target environment. This
+access is for debugging issues and improving system performance. The Terms of Service define further details around data ownership
+and access on a case-by-case basis.
+
+
+DATA ENCRYPTION AND SECURITY
+
+ENCRYPTION
+
+Humanloop follows best practices for data management and encryption. All data in transit is secured with TLS/SSL, and all data at
+rest is encrypted using the AES-256 algorithm. All encryption keys are managed using AWS Key Management Service (KMS) as part of
+the VPC definition.
+
+
+INFRASTRUCTURE
+
+All sensitive data is encrypted in transit. For Self-Hosted Cloud (VPC) environments, network traffic is also encrypted in transit
+and at rest to meet HIPAA requirements. Sensitive application data is only processed within the ECS cluster and stored in Aurora.
+To request a network infrastructure diagram or more information, please contact privacy@humanloop.com [privacy@humanloop.com].
+
+
+LEARN MORE
+
+For more information on how Humanloop processes user data, visit our Security & Compliance [https://trust.humanloop.com] page.
+
+
+DATA STORAGE, RETENTION, AND RECOVERY
+
+All platform data is stored in a primary database server with multi-availability zone replication. Platform data is retained
+indefinitely and backed up daily in a secure and encrypted manner until a request is made by the contractual owners of that data
+to remove it, in accordance with GDPR guidelines.
+
+Humanloop's Terms of Service define the contractual owner of the user data and data derived from the user data. A semi-automated
+disaster recovery process is in place to restore the database to a specified point-in-time backup as required.
+
+
+DATA BREACH RESPONSE
+
+Any data breaches will be communicated to all impacted Humanloop users and partners within 24 hours, along with consequences and
+mitigations. Breaches will be dealt with in accordance with the Humanloop data breach response policy, which is tested annually.
+
+
+DATA PORTABILITY AND RETURN
+
+Within 30 days post-contract termination, users can request the return of their data and derived data (as defined by the Terms of
+Service). Humanloop provides this data via downloadable files in comma-separated value (.csv) or .json formats.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/reference/data-management",
+ "title": "Data Management",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -8234,6 +13041,63 @@ Guides and further details about key concepts can be found in [our docs](/docs/g
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [],
+ "content": "The Humanloop API allows you to interact with Humanloop and model providers programmatically.
+
+You can do this through HTTP requests from any language or via our official Python or TypeScript SDK.
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+Guides and further details about key concepts can be found in our docs [/docs/getting-started/overview].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/api-reference",
+ "title": "Humanloop API",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -8268,6 +13132,44 @@ The Humanloop platform can be accessed through the API or through our Python and
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Introduction",
+ ],
+ "content": "The Humanloop platform can be accessed through the API or through our Python and TypeScript SDKs.
+
+
+USAGE EXAMPLES
+
+npm install humanloop@0.8.0-beta12
+
+
+import { HumanloopClient, Humanloop } from "humanloop";
+
+const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+// Check that the authentication was successful
+console.log(await humanloop.prompts.list());
+
+
+pip install humanloop==0.8.0b17
+
+
+from humanloop import Humanloop
+hl = Humanloop(api_key="")
+
+# Check that the authentication was successful
+print(hl.prompts.list())
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/api-reference/sdks",
+ "title": "SDKs",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -8338,6 +13240,42 @@ In the event an issue occurs with our system, or with one of the model providers
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Introduction",
+ ],
+ "content": "HTTP ERROR CODES
+
+Our API will return one of the following HTTP error codes in the event of an issue:
+
+Your request was improperly formatted or presented. Your API key is incorrect or missing, or your user does not have the rights to
+access the relevant resource. The requested resource could not be located. Modifying the resource would leave it in an illegal
+state. Your request was properly formatted but contained invalid instructions or did not match the fields required by the
+endpoint. You've exceeded the maximum allowed number of requests in a given time period. An unexpected issue occurred on the
+server. The service is temporarily overloaded and you should try again.
+
+
+ERROR DETAILS
+
+Our prompt/call endpoint acts as a unified interface across all popular model providers. The error returned by this endpoint may
+be raised by the model provider's system. Details of the error are returned in the detail object of the response.
+
+{
+ "type": "unprocessable_entity_error",
+ "message": "This model's maximum context length is 4097 tokens. However, you requested 10000012 tokens (12 in the messages, 10000000 in the completion). Please reduce the length of the messages or completion.",
+ "code": 422,
+ "origin": "OpenAI"
+}
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/api-reference/errors",
+ "title": "Errors",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -8442,6 +13380,30 @@ The service is temporarily overloaded and you should try again.
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "September 1900",
+ ],
+ "content": "EVALUATION NAMES
+
+You can now name your Evaluations in the UI and via the API. This is helpful for more easily identifying the purpose of your
+different Evaluations, especially when multiple teams are running different experiments.
+
+Evaluation with a name [file:7440baf8-874f-4fab-a337-335226b9b22d]
+
+In the API, pass in the name field when creating your Evaluation to set the name. Note that names must be unique for all
+Evaluations for a specific file. In the UI, navigate to your Evaluation and you will see an option to rename it in the header.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/9/17",
+ "title": "September 17, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -8496,6 +13458,42 @@ In the API, pass in the \`name\` field when creating your Evaluation to set the
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "September 1900",
+ ],
+ "content": "INTRODUCING FLOWS
+
+We've added a new key building block to our app with the first release of Flows. This release focuses on improving the code-first
+workflows for evaluating more complex AI applications like RAG and Agent-based apps.
+
+Flows allow you to version your whole AI application on Humanloop (as opposed to just individual Prompts and Tools) and allows you
+to log and evaluate the full trace of the important processing steps that occur when running your app.
+
+See our cookbook tutorial [https://github.com/humanloop/humanloop-cookbook/blob/main/tutorials/rag/evaluate-rag-flow.ipynb] for
+examples on how to use Flows in your code.
+
+Image of a Flow with logs [file:6c836381-036b-456b-941f-ae95219dc64d]
+
+
+WHAT'S NEXT
+
+We'll soon be extending support for allowing Evaluators to access all Logs inside a trace. Additionally, we will build on this by
+adding UI-first visualisations and management of your Flows.
+
+We'll sunset Sessions in favour of Flows in the near future. Reach out to us for guidance on how to migrate your Session-based
+workflows to Flows.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/9/15",
+ "title": "September 15, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -8587,32 +13585,28 @@ We'll sunset Sessions in favour of Flows in the near future. Reach out to us for
},
{
"breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2024",
- "title": "2024",
- },
- {
- "slug": "docs/changelog/9",
- "title": "September 1900",
- },
+ "Changelog",
+ "2024",
+ "September 1900",
],
- "description": "We've introduced a Bedrock integration on Humanloop, allowing you to use Anthropic's models via the Bedrock API, leveraging your AWS-managed infrastructure.
+ "content": "BEDROCK SUPPORT FOR ANTHROPIC MODELS
-![AWS Bedrock Claude models in model selection dropdown in a Prompt Editor on Humanloop](file:a2407bfb-9056-49a2-9191-50f0db2c34b6)
+We've introduced a Bedrock integration on Humanloop, allowing you to use Anthropic's models via the Bedrock API, leveraging your
+AWS-managed infrastructure.
-To set this up, head to the API Keys tab in your Organization settings [here](https://app.humanloop.com/account/api-keys). Enter your AWS credentials and configuration.
+AWS Bedrock Claude models in model selection dropdown in a Prompt Editor on Humanloop [file:a2407bfb-9056-49a2-9191-50f0db2c34b6]
-![Bedrock keys dialog in Humanloop app](file:bf081486-80f7-4241-8693-059fdeda754d)
+To set this up, head to the API Keys tab in your Organization settings here [https://app.humanloop.com/account/api-keys]. Enter
+your AWS credentials and configuration.
-Once you've set up your Bedrock keys, you can select the Anthropic models in the model selection dropdown in the Prompt Editor and start using them in your Prompts.",
+Bedrock keys dialog in Humanloop app [file:bf081486-80f7-4241-8693-059fdeda754d]
+
+Once you've set up your Bedrock keys, you can select the Anthropic models in the model selection dropdown in the Prompt Editor and
+start using them in your Prompts.",
"indexSegmentId": "0",
- "slug": "docs/v5/changelog/2024/9/13#bedrock-support-for-anthropic-models",
- "title": "Bedrock support for Anthropic models",
- "type": "page-v4",
+ "slug": "docs/v5/changelog/2024/9/13",
+ "title": "September 13, 2024",
+ "type": "page-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -8633,16 +13627,77 @@ Once you've set up your Bedrock keys, you can select the Anthropic models in the
"title": "September 1900",
},
],
- "description": undefined,
+ "description": "We've introduced a Bedrock integration on Humanloop, allowing you to use Anthropic's models via the Bedrock API, leveraging your AWS-managed infrastructure.
+
+![AWS Bedrock Claude models in model selection dropdown in a Prompt Editor on Humanloop](file:a2407bfb-9056-49a2-9191-50f0db2c34b6)
+
+To set this up, head to the API Keys tab in your Organization settings [here](https://app.humanloop.com/account/api-keys). Enter your AWS credentials and configuration.
+
+![Bedrock keys dialog in Humanloop app](file:bf081486-80f7-4241-8693-059fdeda754d)
+
+Once you've set up your Bedrock keys, you can select the Anthropic models in the model selection dropdown in the Prompt Editor and start using them in your Prompts.",
"indexSegmentId": "0",
- "slug": "docs/v5/changelog/2024/9/10",
- "title": "September 10, 2024",
+ "slug": "docs/v5/changelog/2024/9/13#bedrock-support-for-anthropic-models",
+ "title": "Bedrock support for Anthropic models",
"type": "page-v4",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2024",
+ "title": "2024",
+ },
+ {
+ "slug": "docs/changelog/9",
+ "title": "September 1900",
+ },
+ ],
+ "description": undefined,
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/9/10",
+ "title": "September 10, 2024",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "September 1900",
+ ],
+ "content": "OPENAI O1
+
+We added same day support for OpenAI's new models, the o1 series. Unlike their predecessors, the o1 models have been designed to
+spend more time thinking before they respond. In practise this means that when you call the API, time and tokens are spent doing
+chain-of-thought reasoning before you receive a response back.
+
+o1 in the Humanloop Editor [file:6c098bb5-3d17-4c21-8cc8-18ae5f5e8db5]
+
+Read more about this new class of models in OpenAI's release note [https://openai.com/index/introducing-openai-o1-preview/] and
+their documentation [https://platform.openai.com/docs/guides/reasoning].
+
+These models are still in Beta and don't yet support streaming or tool use, the temperature has to be set to 1 and there are
+specific rate limits in place.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/9/10",
+ "title": "September 10, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -8700,6 +13755,55 @@ These models are still in Beta and don't yet support streaming or tool use, the
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "September 1900",
+ ],
+ "content": "EVALS CICD IMPROVEMENTS
+
+We've expanded our evals API [https://humanloop.com/docs/v5/api-reference/evaluations/get-stats] to include new fields that allow
+you to more easily check on progress and render summaries of your Evals directly in your deployment logs.
+
+The stats response now contains a status you can poll and progess and report fields that you can print:
+
+⏳ Evaluation Progress
+Total Logs: 40/40
+Total Judgments: 120/120
+
+
+
+📊 Evaluation Results for evals_demo/answer-flow
++------------------------+---------------------------+---------------------------+
+| Version id | flv_xo7ZxnkkvcFcDJ9pwSrA9 | flv_foxO18ZHEgxQmwYJO4bR1 |
++------------------------+---------------------------+---------------------------+
+| Created | 2024-09-01 14:50:28 | 2024-09-02 14:53:24 |
++------------------------+---------------------------+---------------------------+
+| Evaluators | | |
++------------------------+---------------------------+---------------------------+
+| evals_demo/exact_match | 0.8 | 0.65 |
+| evals_demo/levenshtein | 7.5 | 33.5 |
+| evals_demo/reasoning | 0.3 | 0.05 |
++------------------------+---------------------------+---------------------------+
+
+
+Navigate to Evaluation: https://app.humanloop.com/evaluations/evr_vXjRgufGzwuX37UY83Lr8
+❌ Latest score [0.05] below the threshold [0.5] for evaluator evals_demo/reasoning.
+❌ Regression of [-0.25] for evaluator evals_demo/reasoning
+
+
+See how you can leverage Evals as part of your CICD pipeline to test for regressions in your AI apps in our reference example
+[https://github.com/humanloop/humanloop-cookbook/blob/main/tutorials/rag/evaluate_rag_cicd.py].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/9/5",
+ "title": "September 5, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -8782,6 +13886,27 @@ See how you can leverage Evals as part of your CICD pipeline to test for regress
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "August 1900",
+ ],
+ "content": "GET ALL DEPLOYED VERSIONS VIA API
+
+We've introduced a new Files API in our v5 API resources that lets you query all files simultaneously. This is useful when
+managing your workflows on Humanloop and you wish to find all files that match specific criteria, such as having a deployment in a
+specific environment. Some of the supported filters to search with are file name, file type, and deployed environments. If you
+find there are additional access patterns you'd find useful, please reach out and let us know.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/8/30",
+ "title": "August 30, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -8832,6 +13957,29 @@ See how you can leverage Evals as part of your CICD pipeline to test for regress
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "August 1900",
+ ],
+ "content": "UPDATE LOGS API
+
+We've introduced the ability to patch Logs for Prompts and Tools. This can come in useful in scenarios where certain
+characteristics of your Log are delayed that you may want to add later, such as the output, or if you have a process of redacting
+inputs that takes time.
+
+Note that not all fields support being patched, so start by referring to our V5 API References [api-reference/prompts]. From
+there, you can submit updates to your previously created logs.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/8/29",
+ "title": "August 29, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -8884,6 +14032,29 @@ Note that not all fields support being patched, so start by referring to our [V5
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "August 1900",
+ ],
+ "content": "SEARCH FILES BY PATH
+
+We've extended our search interface to include file paths, allowing you to more easily find and navigate to related files that
+you've grouped under a directory.
+
+Search dialog showing file paths [file:7e1c0a1e-dea7-4e73-8880-47e7d8d96d74]
+
+Bring up this search dialog by clicking "Search" near the top of the left-hand sidebar, or by pressing Cmd+K.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/8/28",
+ "title": "August 28, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -8938,6 +14109,33 @@ Bring up this search dialog by clicking "Search" near the top of the left-hand s
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "August 1900",
+ ],
+ "content": "UPDATED GEMINI 1.5 MODELS
+
+Humanloop supports the three newly released Gemini 1.5 models.
+
+Start using these improved models by specifying one of the following model names in your Prompts:
+
+ * gemini-1.5-pro-exp-0827 The improved Gemini 1.5 Pro model
+ * gemini-1.5-flash-exp-0827 The improved Gemini 1.5 Flash model
+ * gemini-1.5-flash-8b-exp-0827 The smaller Gemini 1.5 Flash variant
+
+More details on these models can be viewed here
+[https://ai.google.dev/gemini-api/docs/models/experimental-models#available-models].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/8/24",
+ "title": "August 24, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -8996,6 +14194,34 @@ More details on these models can be viewed [here](https://ai.google.dev/gemini-a
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "August 1900",
+ ],
+ "content": "CUSTOM ATTRIBUTES FOR FILES
+
+You can now include custom attributes to determine the unique version of your file definitions on Humanloop.
+
+This allows you to make the version depend on data custom to your application that Humanloop may not be aware of.
+
+For example, if there are feature flags or identifiers that indicate a different configuration of your system that may impact the
+behaviour of your Prompt or Tool.
+
+attributes can be submitted via the v5 API endpoints. When added, the attributes are visible on the Version Drawer and in the
+Editor.
+
+Metadata on versions [file:2d08a4a4-22fc-41e7-ad6e-7eb02313bf21]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/8/20",
+ "title": "August 20, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -9054,6 +14280,29 @@ For example, if there are feature flags or identifiers that indicate a different
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "August 1900",
+ ],
+ "content": "IMPROVED POPOVER UI
+
+We've expanded the information shown in the version popover so that it is easier to identify which version you are working with.
+
+This is particularly useful in places like the Logs table and within Evaluation reports, where you may be working with multiple
+versions of a Prompt, Tool, or Evaluator and need to preview the contents.
+
+Improved version popover [file:797507e6-2827-4ccb-9f02-4123f9fe1b86]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/8/16",
+ "title": "August 16, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -9108,6 +14357,34 @@ This is particularly useful in places like the Logs table and within Evaluation
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "August 1900",
+ ],
+ "content": "EVALUATE UNCOMMITTED VERSIONS
+
+You can now evaluate versions without committing them first. This means you can draft a version of a Prompt in the editor and
+simultaneously evaluate it in the evaluations tab, speeding up your iteration cycle.
+
+This is a global change that allows you to load and use uncommitted versions. Uncommitted versions are created automatically when
+a new version of a Prompt, Tool, or Evaluator is run in their respective editors or called via the API. These versions will now
+appear in the version pickers underneath all your committed versions.
+
+To evaluate an uncommitted version, simply select it by using the hash (known as the "version id") when setting up your
+evaluation.
+
+Uncommitted versions in the version picker [file:bccddb71-a5a3-4222-91be-d4e063a71dad]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/8/15",
+ "title": "August 15, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -9164,6 +14441,54 @@ To evaluate an uncommitted version, simply select it by using the hash (known as
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "August 1900",
+ ],
+ "content": "HUMAN EVALUATOR UPGRADES
+
+We've made significant upgrades to Human Evaluators and related workflows to improve your ability to gather Human judgments
+(sometimes referred to as "feedback") in assessing the quality of your AI applications.
+
+Here are some of the key improvements:
+
+ * Instead of having to define a limited feedback schema tied to the settings of a specific Prompt, you can now define your schema
+ with a Human Evaluator file and reuse it across multiple Prompts and Tools for both monitoring and offline evaluation purposes.
+ * You are no longer restricted to the default types of Rating, Actions and Issues when defining your feedback schemas from the
+ UI. We've introduced a more flexible Editor interface supporting different return types and valence controls.
+ * We've extended the scope of Human Evaluators so that they can now also be used with Tools and other Evaluators (useful for
+ validating AI judgments) in the same way as with Prompts.
+ * We've improved the Logs drawer UI for applying feedback to Logs. In particular, we've made the buttons more responsive.
+
+To set up a Human Evaluator, create a new file. Within the file creation dialog, click on Evaluator, then click on Human. This
+will create a new Human Evaluator file and bring you to its Editor. Here, you can choose a Return type for the Evaluator and
+configure the feedback schema.
+
+Tone evaluator set up with options and instructions [file:9c477a6f-8107-4320-8cd9-ff101f262b7a]
+
+You can then reference this Human Evaluator within the Monitoring dropdown of Prompts, Tools, and other Evaluators, as well as
+when configuring reports in their Evaluations tab.
+
+We've set up default Rating and Correction Evaluators that will be automatically attached to all Prompts new and existing. We've
+migrated all your existing Prompt specific feedback schemas to Human Evaluator files and these will continue to work as before
+with no disruption.
+
+Check out our updated document for further details on how to use Human Evaluators:
+
+ * Create a Human Evaluator [/docs/evaluation/guides/human-evaluator]
+ * Capture End User Feedback [/docs/observability/guides/capture-user-feedback]
+ * Run a Human Evaluation [/docs/evaluation/guides/run-human-evaluation]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/8/14",
+ "title": "August 14, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -9236,6 +14561,57 @@ Check out our updated document for further details on how to use Human Evaluator
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "August 1900",
+ ],
+ "content": "EVALUATIONS IMPROVEMENTS
+
+We've made improvements to help you evaluate the components of your AI applications, quickly see issues and explore the full
+context of each evaluation.
+
+
+A CLEARER EVALUATION TAB IN LOGS
+
+We've given the Log drawer's Evaluation tab a facelift. You can now clearly see what the results are for each of the connected
+Evaluators.
+
+This means that it's now easier to debug the judgments applied to a Log, and if necessary, re-run code/AI Evaluators in-line.
+
+Log drawer's Evaluation tab with the "Run again" menu open [file:96373200-4779-425c-a95f-ee79c26fe5d6]
+
+
+ABILITY TO RE-RUN EVALUATORS
+
+We have introduced the ability to re-run your Evaluators against a specific Log. This feature allows you to more easily address
+and fix issues with previous Evaluator judgments for specific Logs.
+
+You can request a re-run of that Evaluator by opening the menu next to that Evaluator and pressing the "Run Again" option.
+
+
+EVALUATION POPOVER
+
+If you hover over an evaluation result, you'll now see a popover with more details about the evaluation including any intermediate
+results or console logs without context switching.
+
+Evaluation popover [file:46f3ec14-9cc8-479e-bd9f-bdd3a44ee812]
+
+
+UPDATED EVALUATOR LOGS TABLE
+
+The Logs table for Evaluators now supports the functionality as you would expect from our other Logs tables. This will make it
+easier to filter and sort your Evaluator judgments.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/8/13",
+ "title": "August 13, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -9412,26 +14788,22 @@ You can request a re-run of that Evaluator by opening the menu next to that Eval
},
{
"breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2024",
- "title": "2024",
- },
- {
- "slug": "docs/changelog/8",
- "title": "August 1900",
- },
+ "Changelog",
+ "2024",
+ "August 1900",
],
- "description": "We have expanded the packages available in the Evaluator Python environment. The new packages we've added are: \`continuous-eval\`, \`jellyfish\`, \`langdetect\`, \`nltk\`, \`scikit-learn\`, \`spacy\`, \`transformers\`. The full list of packages can been seen in our [Python environment reference](/docs/reference/python-environment).
+ "content": "MORE CODE EVALUATOR PACKAGES
-We are actively improving our execution environment so if you have additional packages you'd like us to support, please do not hesitate to get in touch.",
+We have expanded the packages available in the Evaluator Python environment. The new packages we've added are: continuous-eval,
+jellyfish, langdetect, nltk, scikit-learn, spacy, transformers. The full list of packages can been seen in our Python environment
+reference [/docs/reference/python-environment].
+
+We are actively improving our execution environment so if you have additional packages you'd like us to support, please do not
+hesitate to get in touch.",
"indexSegmentId": "0",
- "slug": "docs/v5/changelog/2024/8/7#more-code-evaluator-packages",
- "title": "More Code Evaluator packages",
- "type": "page-v4",
+ "slug": "docs/v5/changelog/2024/8/7",
+ "title": "August 7, 2024",
+ "type": "page-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -9452,16 +14824,190 @@ We are actively improving our execution environment so if you have additional pa
"title": "August 1900",
},
],
- "description": undefined,
+ "description": "We have expanded the packages available in the Evaluator Python environment. The new packages we've added are: \`continuous-eval\`, \`jellyfish\`, \`langdetect\`, \`nltk\`, \`scikit-learn\`, \`spacy\`, \`transformers\`. The full list of packages can been seen in our [Python environment reference](/docs/reference/python-environment).
+
+We are actively improving our execution environment so if you have additional packages you'd like us to support, please do not hesitate to get in touch.",
"indexSegmentId": "0",
- "slug": "docs/v5/changelog/2024/8/5",
- "title": "August 5, 2024",
+ "slug": "docs/v5/changelog/2024/8/7#more-code-evaluator-packages",
+ "title": "More Code Evaluator packages",
"type": "page-v4",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2024",
+ "title": "2024",
+ },
+ {
+ "slug": "docs/changelog/8",
+ "title": "August 1900",
+ },
+ ],
+ "description": undefined,
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/8/5",
+ "title": "August 5, 2024",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "August 1900",
+ ],
+ "content": "OPENAI STRUCTURED OUTPUTS
+
+OpenAI have introduced Structured Outputs [https://openai.com/index/introducing-structured-outputs-in-the-api/] functionality to
+their API.
+
+This feature allows the model to more reliably adhere to user defined JSON schemas for use cases like information extraction, data
+validation, and more.
+
+We've extended our /chat (in v4) and prompt/call (in v5) endpoints to support this feature. There are two ways to trigger
+Structured Outputs in the API:
+
+ 1. Tool Calling: When defining a tool as part of your Prompt definition, you can now include a strict=true flag. The model will
+ then output JSON data that adheres to the tool parameters schema definition.
+
+""" Example using our v5 API. """
+from humanloop import Humanloop
+
+client = Humanloop(
+ api_key="YOUR_API_KEY",
+)
+
+client.prompts.call(
+ path="person-extractor",
+ prompt={
+ "model": "gpt-4o",
+ "template": [
+ {
+ "role": "system",
+ "content": "You are an information extractor.",
+ },
+ ],
+ "tools": [
+ {
+ "name": "extract_person_object",
+ "description": "Extracts a person object from a user message.",
+ # New parameter to enable structured outputs
+ "strict": True,
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string",
+ "name": "Full name",
+ "description": "Full name of the person",
+ },
+ "address": {
+ "type": "string",
+ "name": "Full address",
+ "description": "Full address of the person",
+ },
+ "job": {
+ "type": "string",
+ "name": "Job",
+ "description": "The job of the person",
+ }
+ },
+ # These fields need to be defined in strict mode
+ "required": ["name", "address", "job"],
+ "additionalProperties": False,
+ },
+ }
+ ],
+ },
+ messages=[
+ {
+ "role": "user",
+ "content": "Hey! I'm Jacob Martial, I live on 123c Victoria street, Toronto and I'm a software engineer at Humanloop.",
+ },
+ ],
+ stream=False,
+)
+
+
+ 2. Response Format: We have expanded the response_format with option json_schema and a request parameter to also include an
+ optional json_schema field where you can pass in the schema you wish the model to adhere to.
+
+
+client.prompts.call(
+ path="person-extractor",
+ prompt={
+ "model": "gpt-4o",
+ "template": [
+ {
+ "role": "system",
+ "content": "You are an information extractor.",
+ },
+ ],
+ # New parameter to enable structured outputs
+ "response_format": {
+ "type": "json_schema",
+ "json_schema": {
+ "name": "person_object",
+ "strict": True,
+ "schema": {
+ "type": "object",
+ "properties": {
+ "name": {
+ "type": "string",
+ "name": "Full name",
+ "description": "Full name of the person"
+ },
+ "address": {
+ "type": "string",
+ "name": "Full address",
+ "description": "Full address of the person"
+ },
+ "job": {
+ "type": "string",
+ "name": "Job",
+ "description": "The job of the person"
+ }
+ },
+ "required": ["name", "address", "job"],
+ "additionalProperties": False
+ }
+ }
+ }
+ },
+ messages=[
+ {
+ "role": "user",
+ "content": "Hey! I'm Jacob Martial, I live on 123c Victoria street, Toronto and I'm a software engineer at Humanloop.",
+ },
+ ],
+ stream=False,
+)
+
+
+This new response formant functionality is only supported by the latest OpenAPI model snapshots gpt-4o-2024-08-06 and
+gpt-4o-mini-2024-07-18.
+
+We will also be exposing this functionality in our Editor UI soon too!",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/8/5",
+ "title": "August 5, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -9637,6 +15183,34 @@ We will also be exposing this functionality in our Editor UI soon too!",
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "August 1900",
+ ],
+ "content": "IMPROVED CODE EVALUATOR DEBUGGING
+
+We've added the ability to view the Standard Output (Stdout) for your Code Evaluators.
+
+You can now use print(...) statements within your code to output intermediate results to aid with debugging.
+
+The Stdout is available within the Debug console as you iterate on your Code Evaluator:
+
+DebugConsole [file:4789435b-e95a-4443-b88c-b5d75939e174]
+
+Additionally, it is stored against the Evaluator Log for future reference:
+
+EvaluatorLog [file:a14f9158-e6ff-46e2-85f3-b9fb57d7f94a]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/8/1",
+ "title": "August 1, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -9697,6 +15271,30 @@ Additionally, it is stored against the Evaluator Log for future reference:
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "July 1900",
+ ],
+ "content": "SELECT MULTIPLE VERSIONS WHEN CREATING AN EVALUATION
+
+Our Evaluations feature allows you to benchmark Versions of a same File. We've made the form for creating new Evaluations simpler
+by allowing the selection of multiple in the picker dialog. Columns will be filled or inserted as needed.
+
+As an added bonus, we've made adding and removing columns feel smoother with animations. The form will also scroll to newly-added
+columns.
+
+[file:e48a60ba-9b9b-4fe1-baee-f19ef063a760]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/7/30",
+ "title": "July 30, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -9751,6 +15349,26 @@ As an added bonus, we've made adding and removing columns feel smoother with ani
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "July 1900",
+ ],
+ "content": "FASTER LOG QUERIES
+
+You should notice that queries against your logs should load faster and the tables should render more quickly.
+
+We're still making more enhancements so keep an eye for more speed-ups coming soon!",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/7/19",
+ "title": "July 19, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -9803,6 +15421,28 @@ We're still making more enhancements so keep an eye for more speed-ups coming so
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "July 1900",
+ ],
+ "content": "GPT-4O-MINI SUPPORT
+
+Latest model from OpenAI, GPT-4o-mini, has been added. It's a smaller version of the GPT-4o model which shows GPT-4 level
+performance with a model that is 60% cheaper than gpt-3.5-turbo.
+
+ * Cost: 15 cents per million input tokens, 60 cents per million output tokens
+ * Performance: MMLU score of 82%",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/7/18",
+ "title": "July 18, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -9856,6 +15496,37 @@ We're still making more enhancements so keep an eye for more speed-ups coming so
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "July 1900",
+ ],
+ "content": "ENHANCED CODE EVALUATORS
+
+We've introduced several enhancements to our code Evaluator runtime environment to support additional packages, environment
+variables, and improved runtime output.
+
+
+RUNTIME ENVIRONMENT
+
+Our Code Evaluator now logs both stdout and stderr when executed and environment variables can now be accessed via the os.environ
+dictionary, allowing you to retrieve values such as os.environ['HUMANLOOP_API_KEY'] or os.environ['PROVIDER_KEYS'].
+
+
+PYTHON PACKAGES
+
+Previously, the selection of Python packages we could support was limited. We are now able to accommodate customer-requested
+packages. If you have specific package requirements for your eval workflows, please let us know!",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/7/10",
+ "title": "July 10, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -9966,28 +15637,22 @@ We're still making more enhancements so keep an eye for more speed-ups coming so
},
{
"breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2024",
- "title": "2024",
- },
- {
- "slug": "docs/changelog/6",
- "title": "June 1900",
- },
+ "Changelog",
+ "2024",
+ "June 1900",
],
- "description": "Gemini 1.5 Flash is Googles most efficient model to date with a long context window and great latency.
+ "content": "GEMINI 1.5 FLASH SUPPORT
+
+Gemini 1.5 Flash is Googles most efficient model to date with a long context window and great latency.
While it’s smaller than 1.5 Pro, it’s highly capable of multimodal reasoning with a 1 million token length context window.
-Find out more about Flash's [availability and pricing](https://blog.google/technology/developers/gemini-gemma-developer-updates-may-2024/)",
+Find out more about Flash's availability and pricing
+[https://blog.google/technology/developers/gemini-gemma-developer-updates-may-2024/]",
"indexSegmentId": "0",
- "slug": "docs/v5/changelog/2024/6/30#gemini-15-flash-support",
- "title": "Gemini 1.5 Flash support",
- "type": "page-v4",
+ "slug": "docs/v5/changelog/2024/6/30",
+ "title": "June 30, 2024",
+ "type": "page-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -10008,16 +15673,75 @@ Find out more about Flash's [availability and pricing](https://blog.google/techn
"title": "June 1900",
},
],
- "description": undefined,
+ "description": "Gemini 1.5 Flash is Googles most efficient model to date with a long context window and great latency.
+
+While it’s smaller than 1.5 Pro, it’s highly capable of multimodal reasoning with a 1 million token length context window.
+
+Find out more about Flash's [availability and pricing](https://blog.google/technology/developers/gemini-gemma-developer-updates-may-2024/)",
"indexSegmentId": "0",
- "slug": "docs/v5/changelog/2024/6/24",
- "title": "June 24, 2024",
+ "slug": "docs/v5/changelog/2024/6/30#gemini-15-flash-support",
+ "title": "Gemini 1.5 Flash support",
"type": "page-v4",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2024",
+ "title": "2024",
+ },
+ {
+ "slug": "docs/changelog/6",
+ "title": "June 1900",
+ },
+ ],
+ "description": undefined,
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/6/24",
+ "title": "June 24, 2024",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "June 1900",
+ ],
+ "content": "COMMITTING AND DEPLOYING UX IMPROVEMENTS
+
+We've made some improvements to the user experience around committing and deploying changes to your evaluators, tools and
+datasets.
+
+Now, each editor has a consistent and reliable loading and saving experience. You can choose prior versions in the dropdown,
+making it easier to toggle between versions.
+
+And, as you commit, you'll also get the option to immediately deploy your changes. This reduces the number of steps needed to get
+your changes live.
+
+Additional bug fixes:
+
+ * Fixed the flickering issue on the datasets editor
+ * Fixed the issue where the evaluator editor would lose the state of the debug drawer on commit.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/6/24",
+ "title": "June 24, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -10077,6 +15801,34 @@ Additional bug fixes:
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "June 1900",
+ ],
+ "content": "CLAUDE 3.5 SONNET SUPPORT
+
+Claude 3.5 Sonnet is now in Humanloop!
+
+Sonnet is the latest and most powerful model from Anthropic.
+
+2x the speed, 1/5th the cost, yet smarter than Claude 3 Opus.
+
+Anthropic have now enabled streaming of tool calls too, which is supported in Humanloop now too.
+
+Add your Anthropic key and select Sonnet in the Editor to give it a go.
+
+Sonnet [file:eab7a6c8-2481-4fc1-969c-a4e8ec833fd7]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/6/20",
+ "title": "June 20, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -10137,6 +15889,27 @@ Add your Anthropic key and select Sonnet in the Editor to give it a go.
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "June 1900",
+ ],
+ "content": "PROMPT AND TOOL VERSION DRAWER IN EVALUATION REPORTS
+
+You can now click on the Prompt and Tool version tags within your Evaluation report to open a drawer with details. This helps
+provide the additional context needed when reasoning with the results without having to navigate awa
+
+Prompt drawer in Evaluation report [file:5a239128-f8f7-4084-ad50-f302ec6bc5b9]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/6/18",
+ "title": "June 18, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -10189,6 +15962,36 @@ Add your Anthropic key and select Sonnet in the Editor to give it a go.
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "June 1900",
+ ],
+ "content": "STATUS OF HUMAN EVALUATORS
+
+With Humanloop Evaluation Reports, you can leverage multiple Evaluators for comparing your Prompt and Tool variations. Evaluators
+can be of different types: code, AI or Human and the progress of the report is dependent on collecting all the required
+judgements. Human judgments generally take longer than the rest and are collected async by members of your team.
+
+Human Evaluators [file:73f7479d-3fc8-4597-b3c2-2723cf8c8e1b]
+
+To better support this workflow, we've improved the UX around monitoring the status of judgments, with a new progress bar. Your
+Human Evaluators can now also update the status of the report when they're done.
+
+Human Evaluators [file:58f1dfca-7812-45a3-a385-51665ec1e5e7]
+
+We've also added the ability to cancel ongoing Evaluations that are pending or running. Humanloop will then stop generating Logs
+and running Evaluators for this Evaluation report.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/6/16",
+ "title": "June 16, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -10247,6 +16050,26 @@ We've also added the ability to cancel ongoing Evaluations that are pending or r
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "June 1900",
+ ],
+ "content": "FASTER EVALUATIONS
+
+Following the recent upgrades around Evaluation reports, we've improved the batching and concurrency for both calling models and
+getting the evaluation results. This has increased the speed of Evaluation report generation by 10x and the reports now update as
+new batches of logs and evaluations are completed to give a sense of intermediary progress.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/6/10",
+ "title": "June 10, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -10297,6 +16120,67 @@ We've also added the ability to cancel ongoing Evaluations that are pending or r
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "June 1900",
+ ],
+ "content": "EVALUATION COMPARISON REPORTS
+
+We've released Evaluation reports, which allows you to easily compare the performance of your different Prompts or Tools across
+multiple different Evaluator [/docs/evaluators] criteria.
+
+This generalises our previous concept of Evaluation runs, extending it with multiple complimentary changes with getting more from
+your evals. All your existing Evaluation runs have been migrated to Evaluation reports with a single evaluated Prompt or Tool. You
+can easily extend these existing runs to cover additional Evaluators and Prompts/Tools with out having to regenerate existing
+logs.
+
+[file:797798fa-e858-42e8-8501-702fdc59d669]
+
+
+FEATURE BREAKDOWN
+
+We've introduced a new stats comparison view, including a radar chart that gives you a quick overview of how your versions compare
+across all Evaluators. Below it, your evaluated versions are shown in columns, forming a grid with a row per Evaluator you've
+selected.
+
+The performance of each version for a given Evaluator is shown in a chart, where bar charts are used for boolean results, while
+box plots are used for numerical results providing an indication of variance within your Dataset.
+
+Evaluation reports also introduce an automatic deduplication feature, which utilizes previous logs to avoid generating new logs
+for the same inputs. If a log already exists for a given evaluated-version-and-datapoint pair, it will automatically be reused.
+You can also override this behavior and force the generation of new logs for a report by creating a New Batch in the setup panel.
+
+[file:95966922-6d64-4e7f-80a7-06c93228420d]
+
+
+HOW TO USE EVALUATION REPORTS
+
+To get started, head over to the Evaluations tab of the Prompt you'd like to evaluate, and click Evaluate in the top right.
+
+This will bring you to a page where you can set up your Evaluation, choosing a Dataset, some versions to Evaluate and compare, and
+the Evaluators you'd like to use.
+
+[file:fc069caf-4584-43a9-abc9-60742b492f0a]
+
+When you click Save, the Evaluation report will be created, and any missing Logs will be generated.
+
+
+WHAT'S NEXT
+
+We're planning on improving the functionality of Evaluation reports by adding a more comprehensive detailed view, allowing you to
+get a more in-depth look at the generations produced by your Prompt versions. Together with this, we'll also be improving Human
+evaluators so you can better annotate and aggregate feedback on your generations.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/6/4",
+ "title": "June 4, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -10450,6 +16334,31 @@ When you click **Save**, the Evaluation report will be created, and any missing
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "May 1900",
+ ],
+ "content": "AZURE MODEL UPDATES
+
+You can now access the latest versions of GPT-4 and GPT-4o hosted on Azure in the Humanloop Editor and via our Chat endpoints.
+
+Once you've configured your Azure key and endpoint in your organization's provider settings, the model versions will show up in
+the Editor dropown as follows:
+
+For more detail, please see the API documentation [https://docs.humanloop.com/reference/logs_list] on our Logs endpoints.
+
+[file:50554232-640e-43f6-a877-512275a13351]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/5/28",
+ "title": "May 28, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -10506,6 +16415,28 @@ For more detail, please see the [API documentation](https://docs.humanloop.com/r
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "May 1900",
+ ],
+ "content": "IMPROVED LOGS FILTERING
+
+We've improved the ability to filter logs by time ranges. The API logs filter parameters for start_date and end_date now supports
+querying with more granularity. Previously the filters were limited to dates, such as 2024-05-22, now you can use hourly ranges as
+well, such as 2024-05-22 13:45.
+
+For more detail, please see the API documentation [https://docs.humanloop.com/reference/logs_list] on our Logs endpoints.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/5/20",
+ "title": "May 20, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -10558,6 +16489,28 @@ For more detail, please see the [API documentation](https://docs.humanloop.com/r
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "May 1900",
+ ],
+ "content": "MONITORING WITH DEPLOYED EVALUATORS
+
+You can now connect deployed Evaluator versions for online monitoring of your Prompts and Tools.
+
+This enables you to update Evaluators for multiple Prompt or Tools when you deploy a new Evaluator version.
+
+[file:2bf434b8-5bba-4cab-81a9-6a9972d0b74b]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/5/15",
+ "title": "May 15, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -10612,6 +16565,28 @@ This enables you to update Evaluators for multiple Prompt or Tools when you depl
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "May 1900",
+ ],
+ "content": "GPT-4O
+
+Same day support for OpenAIs new GPT4-Omni model! You can now use this within the Humanloop Editor and chat APIs.
+
+Find out more from OpenAI here [https://openai.com/index/hello-gpt-4o/].
+
+[file:346e8b74-a33d-4caf-af73-cc012ef9663e]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/5/13",
+ "title": "May 13, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -10666,6 +16641,31 @@ Find out more from OpenAI [here](https://openai.com/index/hello-gpt-4o/).
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "May 1900",
+ ],
+ "content": "LOGS FOR EVALUATORS
+
+For AI and Code Evaluators, you can now inspect and reference their logs as with Prompts and Tools. This provides greater
+transparency into how they are being used and improves the ability to debug and improve.
+
+Further improvements to Human Evaluators are coming very soon...
+
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/5/12",
+ "title": "May 12, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -10722,6 +16722,36 @@ alt="Creating a new Evaluator file" />",
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "May 1900",
+ ],
+ "content": "IMPROVED EVALUATOR MANAGEMENT
+
+Evaluators are now first class citizens alongside Prompts, Tools and Datasets. This allows for easier re-use, version control and
+helps with organising your workspace within directories.
+
+You can create a new Evaluator by choosing Evaluator in the File creation dialog in the sidebar or on your home page.
+
+Creating a new Evaluator file [file:13d97755-6670-4eeb-b14b-a2ad6f7fde97]
+
+
+MIGRATION AND BACKWARDS COMPATIBILITY
+
+We've migrated all of your Evaluators previously managed within Prompts > Evaluations > Evaluators to new Evaluator files. All
+your existing Evaluation runs will remain unchanged and online Evaluators will continue to work as before. Moving forward you
+should use the new Evaluator file to make edits and manage versions.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/5/8",
+ "title": "May 8, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -10805,6 +16835,31 @@ You can create a new Evaluator by choosing **Evaluator** in the File creation di
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "April 1900",
+ ],
+ "content": "LOG DRAWER IN EDITOR
+
+You can now open up the Log drawer directly in the Editor.
+
+This enables you to see exactly what was sent to the provider as well as the tokens used and cost. You can also conveniently add
+feedback and run evaluators on that specific Log, or add it to a dataset.
+
+To show the Logs just click the arrow icon beside each generated message or completion.
+
+[file:c3795728-11e8-475c-80d1-e37750449e01] [file:39cd8988-ef10-4a47-9dad-a1a5c8a15980]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/4/30",
+ "title": "April 30, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -10864,6 +16919,33 @@ To show the Logs just click the arrow icon beside each generated message or comp
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "April 1900",
+ ],
+ "content": "GROQ SUPPORT (BETA)
+
+We have introduced support for models available on Groq to Humanloop. You can now try out the blazingly fast generations made with
+the open-source models (such as Llama 3 and Mixtral 8x7B) hosted on Groq within our Prompt Editor.
+
+[file:406661a0-0d11-4e7f-88b4-8a2bd74f9707]
+
+Groq achieves faster throughput [https://artificialanalysis.ai/models/llama-3-instruct-70b/providers] using specialized hardware,
+their LPU Inference Engine. More information is available in their FAQ [https://wow.groq.com/why-groq/] and on their website.
+
+
+Note that their API service, GroqCloud, is still in beta and low rate limits are enforced.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/4/26",
+ "title": "April 26, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -10923,6 +17005,35 @@ Note that their API service, GroqCloud, is still in beta and low rate limits are
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "April 1900",
+ ],
+ "content": "LLAMA 3
+
+Llama 3 [https://llama.meta.com/llama3/], Meta AI's latest openly-accessible model, can now be used in the Humanloop Prompt
+Editor.
+
+Llama 3 comes in two variants: an 8-billion parameter model that performs similarly to their previous 70-billion parameter Llama 2
+model, and a new 70-billion parameter model. Both of these variants have an expanded context window of 8192 tokens.
+
+More details and benchmarks against other models can be found on their blog post [https://ai.meta.com/blog/meta-llama-3/] and
+model card [https://github.com/meta-llama/llama3/blob/main/MODEL_CARD.md].
+
+Humanloop supports Llama 3 on the Replicate model provider, and on the newly-introduced Groq model provider.
+
+[file:d41f9b89-ab08-44c7-8805-dfed655373ad]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/4/23",
+ "title": "April 23, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -10981,6 +17092,30 @@ Humanloop supports Llama 3 on the Replicate model provider, and on the newly-int
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "April 1900",
+ ],
+ "content": "ANTHROPIC TOOL SUPPORT (BETA)
+
+Our Editor and deployed endpoints now supports tool use with the Anthropic's Claude3 models. Tool calling with Anthropic is still
+in Beta, so streaming is not important.
+
+In order to user tool calling for Claude in Editor you therefore need to first turn off streaming mode in the menu dropdown to the
+right of the load button.
+
+[file:5b254c05-1e62-4d5c-8685-c1be17c7d4ae]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/4/18",
+ "title": "April 18, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -11035,6 +17170,29 @@ In order to user tool calling for Claude in Editor you therefore need to first t
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "April 1900",
+ ],
+ "content": "COST, TOKENS AND LATENCY
+
+We now compute Cost, Tokens and Latency for all Prompt logs by default across all model providers.
+
+These values will now appear automatically as graphs in your Dashboard, as columns in your logs table and will be displayed in our
+Version and Log drawers.
+
+[file:6e43ec10-4d93-415e-8f5f-ddfcb2e5afcc]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/4/16",
+ "title": "April 16, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -11089,6 +17247,29 @@ These values will now appear automatically as graphs in your Dashboard, as colum
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "April 1900",
+ ],
+ "content": "COHERE COMMAND-R
+
+We've expanded the Cohere models with the latest command-r suite. You can now use these models in our Editor and via our APIs once
+you have set your Cohere API key.
+
+More details can be found on their blog post [https://cohere.com/blog/command-r-plus-microsoft-azure].
+
+[file:c641a39e-a943-4bc2-8ac6-f93be9ce5677]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/4/13",
+ "title": "April 13, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -11143,6 +17324,40 @@ More details can be found on their [blog post](https://cohere.com/blog/command-r
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "April 1900",
+ ],
+ "content": "DATASET FILES & VERSIONS
+
+In our recent release, we promoted Datasets from being attributes managed within the context of a single Prompt, to a first-class
+Humanloop file type alongside Prompts and Tools.
+
+[file:199450d9-025f-4527-bf47-d1ffaee30f35]
+
+This means you can curate Datasets and share them for use across any of the Prompts in your organization. It also means you get
+the full power of our file versioning system, allowing you track and commit every change you make Datasets and their Datapoints,
+with attribution and commit messages inspired by Git.
+
+[file:0498ba6f-e390-49e8-9377-72f7492cf47e]
+
+It's now easy to understand which version of a Dataset was used in a given Evaluation run, and whether the most recent edits to
+the Dataset were included or not.
+
+Read more on how to get started with datasets here [/docs/datasets].
+
+This change lays the foundation for lots more improvements we have coming to Evaluations in the coming weeks. Stay tuned!",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/4/5",
+ "title": "April 5, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -11205,6 +17420,41 @@ This change lays the foundation for lots more improvements we have coming to Eva
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "March 1900",
+ ],
+ "content": "MIXTRAL 8X7B
+
+Keeping you up to date with the latest open models, we've added support for Mixtral 8x7B to our Editor with a Replicate
+integration [https://replicate.com/].
+
+[file:79a39f66-7b69-4c4c-801d-1066a0eb8858]
+
+Mixtral 8x7B outperforms LLaMA 2 70B (already supported in Editor) with faster inference, with performance comparable to that of
+GPT-3.5. More details are available in its release announcement [https://mistral.ai/news/mixtral-of-experts/].
+
+
+ADDITIONAL REPLICATE MODELS SUPPORT VIA API
+
+Through the Replicate model provider additional open models can be used by specifying a model name via the API. The model name
+should be of a similar form as the ref used when calling replicate.run(ref) using Replicate's Python SDK
+[https://github.com/replicate/replicate-python].
+
+For example, Vicuna, an open-source chatbot model based on finetuning LLaMA can be used with the following model name alongside
+provider: "replicate" in your Prompt version.
+replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/3/25",
+ "title": "March 25, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -11288,6 +17538,37 @@ For example, Vicuna, an open-source chatbot model based on finetuning LLaMA can
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "March 1900",
+ ],
+ "content": "SURFACING UNCOMMITTED VERSIONS
+
+We now provide the ability to access your uncommitted Prompt Versions and associated Logs.
+
+Adding to our recent changes around the Commit flow for Versions
+[https://docs.humanloop.com/changelog/prompts-and-committing-prompt-versions], we've added the ability to view any uncommitted
+versions in your Versions and Logs tables. This can be useful if you need to recover or compare to a previous state during your
+Prompt engineering and Evaluation workflows.
+
+Uncommitted Versions are created when you make generations in our Editor without first committing what you are working on. In
+future, it will also be possible to create uncommitted versions when logging or generating using the API.
+
+We've added new filter tabs to the Versions and Logs table to enable this:
+
+New **All** and From **Committed By Versions** filter tabs on the logs table. [file:551045ba-f04a-4286-9130-2053873be8d9] New
+**Committed** and **Uncommitted** tabs on the Versions table of your Prompt dashboard. [file:53bad108-b8cb-4865-b981-ce32d365f006]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/3/18",
+ "title": "March 18, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -11349,6 +17630,32 @@ We've added new filter tabs to the Versions and Logs table to enable this:
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "March 1900",
+ ],
+ "content": "IMPROVED NAVIGATION & SIDEBAR
+
+We've introduced a sidebar for easier navigation between your Prompts and Tools.
+
+As new language models unlock more complex use cases, you'll be setting up and connecting Prompts, Tools, and Evaluators. The new
+layout better reflects these emerging patterns, and switching between your files is now seamless with the directory tree in the
+sidebar.
+
+[file:a38066ee-9657-4012-a07a-093f2d46e66d]
+
+You can also bring up the search dialog with Cmd+K and switch to another file using only your keyboard.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/3/7",
+ "title": "March 7, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -11405,6 +17712,48 @@ You can also bring up the search dialog with **Cmd+K** and switch to another fil
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "March 1900",
+ ],
+ "content": "CLAUDE 3
+
+Introducing same day support for the Claude 3 - Anthropics new industry leading models. Read more about the release here
+[https://www.anthropic.com/news/claude-3-family].
+
+The release contains three models in ascending order of capability: Haiku, Sonnet, and Opus. This suite provides users with the
+different options to balance intelligence, speed, and cost for their specific use-cases.
+
+[file:e71bc26c-7a15-4f75-afb6-bca5f8f1022c]
+
+
+KEY TAKE AWAYS
+
+ 1. Performance - a new leader. The largest of the 3 models, Opus, is claimed to outperform GPT-4 and Gemini Ultra on key
+ benchmarks such as MMLU and Hellaswag. It even reached 84.9% on the Humaneval coding test set (vs GPT-4’s 67%) 🤯
+ 2. 200k context window with near-perfect recall on selected benchmarks. Opus reports 99% accuracy on the NIAH test, which
+ measures how accurately a model can recall information given to it in a large corpus of data.
+ 3. Opus has vision. Anthropic claim that performance here is on par with that of other leading models (ie GPT-4 and Gemini). They
+ say it’s most useful for inputting graphs, slides etc. in an enterprise setting.
+ 4. Pricing - as compared to OpenAI:
+
+Opus - $75 (2.5x GPT-4 Turbo)
+Sonnet - $15 (50% of GPT-4 Turbo)
+Haiku - $1.25 (1.6x GPT-3.5)
+
+ 5. How you can use it: The Claude 3 family is now available on Humanloop. Bring your API key to test, evaluate and deploy the
+ publicly available models - Opus and Sonnet.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/3/6",
+ "title": "March 6, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -11495,86 +17844,94 @@ Haiku - $1.25 (1.6x GPT-3.5)
},
{
"breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2024",
- "title": "2024",
- },
- {
- "slug": "docs/changelog/2",
- "title": "February 1900",
- },
+ "Changelog",
+ "2024",
+ "February 1900",
],
- "description": "You can now create Tools in the same way as you create Prompts and Directories. This is helpful as it makes it easier to discover Tools and easier to quickly create new ones.
+ "content": "NEW TOOL CREATION FLOW
-![](file:938abbba-c26b-43d0-b511-5f58520098cc)
+You can now create Tools in the same way as you create Prompts and Directories. This is helpful as it makes it easier to discover
+Tools and easier to quickly create new ones.
-To create a new Tool simply press the New button from the directory of your choice and select one of our supported Tools, such as JSON Schema tool for function calling or our Pinecone tool to integrate with your RAG pipelines.",
- "indexSegmentId": "0",
- "slug": "docs/v5/changelog/2024/2/26#new-tool-creation-flow",
- "title": "New Tool creation flow",
- "type": "page-v4",
- "version": {
- "id": "v5.0",
- "slug": "docs/getting-started/overview",
- },
- },
- {
- "breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2024",
- "title": "2024",
- },
- {
- "slug": "docs/changelog/2",
- "title": "February 1900",
- },
- ],
- "description": "You can now manage and edit your Tools in our new Tool Editor. This is found in each Tool file and lets you create and iterate on your tools. As well, we have introduced deployments to Tools, so you can better control which versions of a tool are used within your Prompts.
+[file:938abbba-c26b-43d0-b511-5f58520098cc]
-![](file:e2489aa7-ca61-4555-809c-80c1f7e1803e)",
- "indexSegmentId": "0",
- "slug": "docs/v5/changelog/2024/2/26#tool-editor-and-deployments",
- "title": "Tool editor and deployments",
- "type": "page-v4",
- "version": {
- "id": "v5.0",
- "slug": "docs/getting-started/overview",
- },
- },
- {
- "breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2024",
- "title": "2024",
- },
- {
- "slug": "docs/changelog/2",
- "title": "February 1900",
- },
- {
- "slug": "docs/v5/changelog/2024/2/26#tool-editor-and-deployments",
- "title": "Tool editor and deployments",
- },
- ],
- "description": "This replaces the previous Tools section which has been removed. The editor will let you edit any of the tool types that Humanloop supports (JSON Schema, Google, Pinecone, Snippet, Get API) and commit new Versions.
+To create a new Tool simply press the New button from the directory of your choice and select one of our supported Tools, such as
+JSON Schema tool for function calling or our Pinecone tool to integrate with your RAG pipelines.
-![](file:1de72ff7-e6c5-4eb5-892a-977bbed692bf)",
+
+TOOL EDITOR AND DEPLOYMENTS
+
+You can now manage and edit your Tools in our new Tool Editor. This is found in each Tool file and lets you create and iterate on
+your tools. As well, we have introduced deployments to Tools, so you can better control which versions of a tool are used within
+your Prompts.
+
+[file:e2489aa7-ca61-4555-809c-80c1f7e1803e]
+
+
+TOOL EDITOR
+
+This replaces the previous Tools section which has been removed. The editor will let you edit any of the tool types that Humanloop
+supports (JSON Schema, Google, Pinecone, Snippet, Get API) and commit new Versions.
+
+[file:1de72ff7-e6c5-4eb5-892a-977bbed692bf]
+
+
+DEPLOYMENT
+
+Tools can now be deployed. You can pick a version of your Tool and deploy it. When deployed it can be used and referenced in a
+Prompt editor.
+
+And example of this, if you have a version of a Snippet tool with the signature snippet(key) with a key/value pair of
+"helpful"/"You are a helpful assistant". You decide you would rather change the value to say "You are a funny assistant", you can
+commit a version of the Tool with the updated key. This wont affect any of your prompts that reference the Snippet tool until you
+Deploy the second version, after which each prompt will automatically start using the funny assistant prompt.
+
+
+PROMPT LABELS AND HOVER CARDS
+
+We've rolled out a unified label for our Prompt Versions to allow you to quickly identify your Prompt Versions throughout our UI.
+As we're rolling out these labels across the app, you'll have a consistent way of interacting with and identifying your Prompt
+Versions.
+
+Label and hover card for a deployed Prompt Version [file:08556665-8f0e-4a81-a5d5-8494053a26d3]
+
+The labels show the deployed status and short ID of the Prompt Version. When you hover over these labels, you will see a card that
+displays the commit message and authorship of the committed version.
+
+You'll be able to find these labels in many places across the app, such as in your Prompt's deployment settings, in the Logs
+drawer, and in the Editor.
+
+The Prompt Version label and hover card in a Prompt Editor [file:595a3578-a497-450e-950e-dbef23fb0c64]
+
+As a quick tip, the color of the checkmark in the label indicates that this is a version that has been deployed. If the Prompt
+Version has not been deployed, the checkmark will be black.
+
+A Prompt Version that has not been deployed [file:c518da50-806c-46d9-adaa-7e01ed4c19cf]
+
+
+COMMITTING PROMPT VERSIONS
+
+Building on our terminology improvements from Project -> Prompt, we've now updated Model Configs -> Prompt Versions to improve
+consistency in our UI.
+
+This is part of a larger suite of changes to improve the workflows around how entities are managed on Humanloop and to make them
+easier to work with and understand. We will also be following up soon with a new and improved major version of our API that
+encapsulates all of our terminology improvements.
+
+In addition to just the terminology update, we've improved our Prompt versioning functionality to now use commits that can take
+commit messages, where you can describe how you've been iterating on your Prompts.
+
+We've removed the need for names (and our auto-generated placeholder names) in favour of using explicit commit messages.
+
+[file:4e590a72-bc55-4bb8-9dec-795e42bfe4af]
+
+We'll continue to improve the version control and file types support over the coming weeks.
+
+Let us know if you have any questions around these changes!",
"indexSegmentId": "0",
- "slug": "docs/v5/changelog/2024/2/26#tool-editor",
- "title": "Tool Editor",
- "type": "page-v4",
+ "slug": "docs/v5/changelog/2024/2/26",
+ "title": "February 26, 2024",
+ "type": "page-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -11594,17 +17951,104 @@ To create a new Tool simply press the New button from the directory of your choi
"slug": "docs/changelog/2",
"title": "February 1900",
},
- {
- "slug": "docs/v5/changelog/2024/2/26#tool-editor-and-deployments",
- "title": "Tool editor and deployments",
- },
],
- "description": "Tools can now be deployed. You can pick a version of your Tool and deploy it. When deployed it can be used and referenced in a Prompt editor.
+ "description": "You can now create Tools in the same way as you create Prompts and Directories. This is helpful as it makes it easier to discover Tools and easier to quickly create new ones.
-And example of this, if you have a version of a Snippet tool with the signature \`snippet(key)\` with a key/value pair of "_helpful_"/"_You are a helpful assistant_". You decide you would rather change the value to say "_You are a funny assistant_", you can commit a version of the Tool with the updated key. This wont affect any of your prompts that reference the Snippet tool until you Deploy the second version, after which each prompt will automatically start using the funny assistant prompt.",
+![](file:938abbba-c26b-43d0-b511-5f58520098cc)
+
+To create a new Tool simply press the New button from the directory of your choice and select one of our supported Tools, such as JSON Schema tool for function calling or our Pinecone tool to integrate with your RAG pipelines.",
"indexSegmentId": "0",
- "slug": "docs/v5/changelog/2024/2/26#deployment",
- "title": "Deployment",
+ "slug": "docs/v5/changelog/2024/2/26#new-tool-creation-flow",
+ "title": "New Tool creation flow",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2024",
+ "title": "2024",
+ },
+ {
+ "slug": "docs/changelog/2",
+ "title": "February 1900",
+ },
+ ],
+ "description": "You can now manage and edit your Tools in our new Tool Editor. This is found in each Tool file and lets you create and iterate on your tools. As well, we have introduced deployments to Tools, so you can better control which versions of a tool are used within your Prompts.
+
+![](file:e2489aa7-ca61-4555-809c-80c1f7e1803e)",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/2/26#tool-editor-and-deployments",
+ "title": "Tool editor and deployments",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2024",
+ "title": "2024",
+ },
+ {
+ "slug": "docs/changelog/2",
+ "title": "February 1900",
+ },
+ {
+ "slug": "docs/v5/changelog/2024/2/26#tool-editor-and-deployments",
+ "title": "Tool editor and deployments",
+ },
+ ],
+ "description": "This replaces the previous Tools section which has been removed. The editor will let you edit any of the tool types that Humanloop supports (JSON Schema, Google, Pinecone, Snippet, Get API) and commit new Versions.
+
+![](file:1de72ff7-e6c5-4eb5-892a-977bbed692bf)",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/2/26#tool-editor",
+ "title": "Tool Editor",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2024",
+ "title": "2024",
+ },
+ {
+ "slug": "docs/changelog/2",
+ "title": "February 1900",
+ },
+ {
+ "slug": "docs/v5/changelog/2024/2/26#tool-editor-and-deployments",
+ "title": "Tool editor and deployments",
+ },
+ ],
+ "description": "Tools can now be deployed. You can pick a version of your Tool and deploy it. When deployed it can be used and referenced in a Prompt editor.
+
+And example of this, if you have a version of a Snippet tool with the signature \`snippet(key)\` with a key/value pair of "_helpful_"/"_You are a helpful assistant_". You decide you would rather change the value to say "_You are a funny assistant_", you can commit a version of the Tool with the updated key. This wont affect any of your prompts that reference the Snippet tool until you Deploy the second version, after which each prompt will automatically start using the funny assistant prompt.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/2/26#deployment",
+ "title": "Deployment",
"type": "page-v4",
"version": {
"id": "v5.0",
@@ -11713,6 +18157,45 @@ Let us know if you have any questions around these changes!",
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "February 1900",
+ ],
+ "content": "ONLINE EVALUATORS FOR MONITORING TOOLS
+
+You can now use your online evaluators for monitoring the logs sent to your Tools. The results of this can be seen in the graphs
+on the Tool dashboard as well as on the Logs tab of the Tool.
+
+[file:dc38190a-d0aa-4369-bcf5-4e2b87009f50]
+
+To enable Online Evaluations follow the steps seen in our Evaluate models online [/docs/guides/evaluate-models-online] guide.
+
+
+LOGGING TOKEN USAGE
+
+We're now computing and storing the number of tokens used in both the requests to and responses from the model.
+
+This information is available in the logs table UI and as part of the log response [/api-reference/logs/get] in the API.
+Furthermore you can use the token counts as inputs to your code and LLM based evaluators.
+
+The number of tokens used in the request is called prompt_tokens and the number of tokens used in the response is called
+output_tokens.
+
+This works consistently across all model providers and whether or not you are you are streaming the responses. OpenAI, for
+example, do not return token usage stats when in streaming mode.
+
+[file:20386cb1-3684-4495-b7c6-843ba5284866]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/2/14",
+ "title": "February 14, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -11800,6 +18283,32 @@ This works consistently across all model providers and whether or not you are yo
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "February 1900",
+ ],
+ "content": "PROMPT VERSION AUTHORSHIP
+
+You can now view who authored a Prompt Version.
+
+Prompt Version authorship in the Prompt Version slideover [file:5b270f85-8943-4a8d-b26e-35e8e7af8132]
+
+We've also introduced a popover showing more Prompt Version details that shows when you mouseover a Prompt Version's ID.
+
+Prompt Version popover in the Logs slideover [file:7fa52e59-7340-4879-85aa-b3f90e94e626]
+
+Keep an eye out as we'll be introducing this in more places across the app.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/2/13",
+ "title": "February 13, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -11860,6 +18369,40 @@ Keep an eye out as we'll be introducing this in more places across the app.",
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "February 1900",
+ ],
+ "content": "FILTERABLE AND SORTABLE EVALUATIONS OVERVIEW
+
+We've made improvements to the evaluations runs overview page to make it easier for your team to find interesting or important
+runs.
+
+[file:8238b014-44fd-4143-896f-d925082329d2]
+
+The charts have been updated to show a single datapoint per run. Each chart represents a single evaluator, and shows the
+performance of the prompt tested in that run, so you can see at a glance how the performance your prompt versions have evolved
+through time, and visually spot the outliers. Datapoints are color-coded by the dataset used for the run.
+
+The table is now paginated and does not load your entire project's list of evaluation runs in a single page load. The page should
+therefore load faster for teams with a large number of runs.
+
+The columns in the table are now filterable and sortable, allowing you to - for example - filter just for the completed runs which
+test two specific prompt versions on a specific datasets, sorted by their performance under a particular evaluator.
+
+Here, we've filtered the table on completed runs that tested three specific prompt versions of interest, and sorted to show those
+with the worst performance on the Valid JSON evaluator. [file:c035e654-05ec-46e4-842a-65bb0418ef55]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/2/9",
+ "title": "February 9, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -11920,6 +18463,38 @@ The columns in the table are now filterable and sortable, allowing you to - for
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "February 1900",
+ ],
+ "content": "PROJECTS RENAME AND FILE CREATION FLOW
+
+We've renamed Projects to Prompts and Tools as part of our move towards managing Prompts, Tools, Evaluators and Datasets as
+special-cased and strictly versioned files in your Humanloop directories.
+
+This is a purely cosmetic change for now. Your Projects (now Prompts and Tools) will continue to behave exactly the same. This is
+the first step in a whole host of app layout, navigation and API improvements we have planned in the coming weeks.
+
+If you are curious, please reach out to learn more.
+
+[file:d6f85253-a5e6-41e2-9f60-0cb4ffe7cd1b]
+
+New creation flow
+
+We've also updated our file creation flow UI. When you go to create projects you'll notice they are called Prompts now.
+
+[file:c2bd3fb0-f5f1-4cc6-bee0-5711b2664357]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/2/8",
+ "title": "February 8, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -11982,6 +18557,68 @@ We've also updated our file creation flow UI. When you go to create projects you
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "February 1900",
+ ],
+ "content": "CONTROL LOGGING LEVEL
+
+We've added a save flag to all of our endpoints that generate logs on Humanloop so that you can control whether the request and
+response payloads that may contain sensitive information are persisted on our servers or not.
+
+If save is set to false then no inputs, messages our outputs of any kind (including the raw provider request and responses) are
+stored on our servers. This can be helpful for sensitive use cases where you can't for example risk PII leaving your system.
+
+Details of the model configuration and any metadata you send are still stored. Therefore you can still benefit from certain types
+of evaluators such as human feedback, latency and cost, as well as still track important metadata over time that may not contain
+sensitive information.
+
+This includes all our chat [/api-reference/chats/create] and completion [/api-reference/completions/create] endpoint variations,
+as well as our explicit log [/api-reference/logs/log] endpoint.
+
+from humanloop import Humanloop
+
+# You need to initialize the Humanloop SDK with your API Keys
+humanloop = Humanloop(api_key="")
+
+# humanloop.complete_deployed(...) will call the active model config on your project.
+# You can optionally set the save flag to False
+complete_response = humanloop.complete_deployed(
+ save=False,
+ project="",
+ inputs={"question": "I have inquiry about by life insurance policy. Can you help?"},
+)
+
+# You can still retrieve the data_id and output as normal
+data_id = complete_response.data[0].id
+output = complete_response.data[0].output
+
+# And log end user feedback that will still be stored
+humanloop.feedback(data_id=data_id, type="rating", value="good")
+
+
+
+
+LOGGING PROVIDER REQUEST
+
+We're now capturing the raw provider request body alongside the existing provider response for all logs generated from our
+deployed endpoints [/docs/guides/chat-using-the-sdk].
+
+This provides more transparency into how we map our provider agnostic requests to specific providers. It can also effective for
+helping to troubleshoot the cases where we return well handled provider errors from our API.
+
+[file:00230e09-61a9-484b-9645-ef413af2b2b8]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/2/2",
+ "title": "February 2, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -12091,6 +18728,118 @@ This provides more transparency into how we map our provider agnostic requests t
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "January 1900",
+ ],
+ "content": "ADD EVALUATORS TO EXISTING RUNS
+
+You can now add an evaluator to any existing evaluation run. This is helpful in situations where you have no need to regenerate
+logs across a dataset, but simply want to run new evaluators across the existing run. By doing this instead of launching a fresh
+run, you can the save significant time & costs associated with unnecessarily regenerating logs, especially when working with large
+datasets.
+
+Use the **Add Evaluator** button to run more evaluators across the logs in an existing evaluation run. This can be done on any
+runs, including those still running or already completed. [file:088bd4ed-4574-423e-b697-8d393e06ff5c]
+
+
+IMPROVED EVALUATION DEBUG CONSOLE
+
+We've enhanced the usability of the debug console when creating and modifying evaluators. Now you can more easily inspect the data
+you are working with, and understand the root causes of errors to make debugging quicker and more intuitive.
+
+[file:6118f4b8-8e83-4616-879f-9c45c156766a]
+
+On any row in the debug console, click the arrow next to a testcase to inspect the full entity in a slideover panel.
+
+After clicking Run to generate a log from a testcase, you can inspect the full log right from the debug console, giving you
+clearer access to error messages or the model-generated content, as in the example below.
+
+[file:0c0c1802-168e-4645-bde8-5f2a62dcba8b]
+
+
+LLM EVALUATORS
+
+We expect this feature to be most useful in the case of creating and debugging LLM evaluators. You can now inspect the log of the
+LLM evaluation itself right from the debug console, along with the original testcase and model-generated log, as described above.
+
+After clicking Run on a testcase in the debug console, you'll see the LLM Evaluation Log column populated with a button that opens
+a full drawer.
+
+[file:e1a43293-9ea1-4e5a-aa03-45e7f68fec98]
+
+This is particularly helpful to verify that your evaluation prompt was correctly populated with data from the underlying log and
+testcase, and to help understand why the LLM's evaluation output may not have been parsed correctly into the output values.
+
+[file:fed5fba1-2196-4bd9-95e7-925b72a5a74b]
+
+
+TOOL PROJECTS
+
+We have upgraded projects to now also work for tools. Tool projects are automatically created for tools you define as part of your
+model config in the Editor [/docs/guides/create-a-tool-in-the-editor] as well as tools managed at organization level
+[/docs/guides/link-a-jsonschema-tool].
+
+It is now easier to access the logs from your tools and manage different versions like you currently do for your prompts.
+
+[file:f90baa0d-d702-4a46-b632-46e372320c78]
+
+
+TOOL VERSIONING
+
+In the dashboard view, you can see the different versions of your tools. This will soon be expanded to link you to the source
+config and provide a more comprehensive view of your tool's usage.
+
+
+LOGS
+
+Any logs submitted via the SDK that relate to these tools will now appear in the Logs view of these projects. You can see this by
+following our sessions guide [https://dash.readme.com/project/humanloop/v4.0/docs/logging-session-traces] and logging a new tool
+via the SDK. This also works natively with online Evaluators, so you can start to layer in observability for the individual
+non-LLM components of your session
+
+
+OFFLINE EVALUATIONS VIA SDK
+
+You can trigger evaluations on your tools projects similar to how you would for an LLM project with model configs. This can be
+done by logging to the tool project, creating a dataset, and triggering an evaluation run. A good place to start would be the Set
+up evaluations using API [/docs/guides/evaluations-using-api] guide.
+
+
+SUPPORT FOR NEW OPENAI MODELS
+
+Following OpenAI's latest model releases [https://openai.com/blog/new-embedding-models-and-api-updates], you will find support for
+all the latest models in our Playground and Editor.
+
+
+GPT-3.5-TURBO AND GPT-4-TURBO
+
+If your API key has access to the models, you'll see the new release gpt-4-0125-preview and gpt-3.5-turbo-0125 available when
+working in Playground and Editor. These models are more capable and cheaper than their predecessors - see the OpenAI release
+linked above for full details.
+
+[file:aef6d838-1f19-4314-8ea4-ad27a8f59585]
+
+We also support the new gpt-4-turbo-preview model alias, which points to the latest gpt-4-turbo model without specifying a
+specific version.
+
+
+EMBEDDING MODELS
+
+Finally, the new embedding models - text-embedding-3-small and text-embedding-3-large are also available for use via Humanloop.
+The small model is 5x cheaper than the previous generation ada-002 embedding model, while the larger model significantly improves
+performance and maps to a much larger embedding space.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/1/30",
+ "title": "January 30, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -12412,6 +19161,40 @@ We also support the new \`gpt-4-turbo-preview\` model alias, which points to the
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "January 1900",
+ ],
+ "content": "IMPROVED EVALUATION RUN LAUNCHER
+
+We've made some usability enhancements to the launch experience when setting up batch generation & evaluation runs.
+
+It's now clearer which model configs, datasets and evaluators you've selected. It's also now possible to specify whether you want
+the logs to be generated in the Humanloop runtime, or if you're going to post the logs from your own infrastructure via the API.
+
+[file:26511ea5-4dc5-4bdf-a083-7594715ee1e1]
+
+
+CANCELLABLE EVALUATION RUNS
+
+Occasionally, you may launch an evaluation run and then realise that you didn't configure it quite the way you wanted. Perhaps you
+want to use a different model config or dataset, or would like to halt its progress for some other reason.
+
+We've now made evaluation runs cancellable from the UI - see the screenshot below. This is especially helpful if you're running
+evaluations over large datasets, where you don't want to unnecessarily consume provider credits.
+
+Cancellation button in the evaluation run page. [file:11f8471b-5866-4bf5-aa52-86c24bf8a677]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/1/19",
+ "title": "January 19, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -12499,6 +19282,33 @@ We've now made evaluation runs cancellable from the UI - see the screenshot belo
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "January 1900",
+ ],
+ "content": "FASTER OFFLINE EVALUATIONS
+
+We've introduced batching to our offline Evaluations to significantly speed up runtime performance and also improved the
+robustness to things going wrong mid-run.
+
+In addition to our recent enhancements to the Evaluations API [changelog:evaluation-api-enhancements], we've also made some
+significant improvements to our underlying orchestration framework which should mean your evaluation runs are now faster and more
+reliable. In particular, we now batch generations across the run - by default in groups of five, being conscious of potential rate
+limit errors (though this will soon be configurable).
+
+Each batch runs its generations concurrently, so you should see much faster completion times - especially in runs across larger
+datasets.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/1/12",
+ "title": "January 12, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -12553,6 +19363,50 @@ Each batch runs its generations concurrently, so you should see much faster comp
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2024",
+ "January 1900",
+ ],
+ "content": "EVALUATION API ENHANCEMENTS
+
+We've started the year by enhancing our evaluations API to give you more flexibility for self-hosting whichever aspects of the
+evaluation workflow you need to run in your own infrastructure - while leaving the rest to us!
+
+
+MIXING AND MATCHING THE HUMANLOOP-RUNTIME WITH SELF-HOSTING
+
+Conceptually, evaluation runs have two components:
+
+ 1. Generation of logs for the datapoints using the version of the model you are evaluating.
+ 2. Evaluating those logs using Evaluators.
+
+Now, using the Evaluations API, Humanloop offers the ability to generate logs either within the Humanloop runtime, or self-hosted
+(see our guide on external generations for evaluations [/docs/guides/evaluating-externally-generated-logs]).
+
+Similarly, evaluating of the logs can be performed in the Humanloop runtime (using evaluators that you can define in-app), or
+self-hosted (see our guide on self-hosted evaluations [/docs/guides/self-hosted-evaluations]).
+
+It is now possible to mix-and-match self-hosted and Humanloop-runtime logs and evaluations in any combination you wish.
+
+When creating an Evaluation (via the improved UI dialogue or via the API), you can set the new hl_generated flag to False to
+indicate that you are posting the logs from your own infrastructure. You can then also include an evaluator of type External to
+indicate that you will post evaluation results from your own infrastructure.
+
+[file:54ec0056-034c-4bf5-8393-d8d2846d68bd]
+
+You can now also include multiple evaluators on any run, and these can include a combination of External (i.e. self-hosted) and
+Humanloop-runtime evaluators.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2024/1/11",
+ "title": "January 11, 2024",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -12647,6 +19501,82 @@ You can now also include multiple evaluators on any run, and these can include a
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "December 1900",
+ ],
+ "content": "HUMAN EVALUATORS
+
+We've introduced a new special type of 'Human' Evaluator to compliment our existing code and AI based Evaluators.
+
+There are many important evaluation use cases that require input from your internal domain experts, or product teams. Typically
+this is where you would like a gold standard judgement of how your LLM app is performing.
+
+[file:2cac696f-23eb-406f-8286-2c1b5427ef52]
+
+Our new Human Evaluator allows you to trigger a batch evaluation run as normal (from our UI as part of your prompt engineering
+process, or using our SDK as part of your CI/CD pipeline) and then queues the results ready for a human to provide feedback.
+
+Once completed, the feedback is aggregated to give a top-line summary of how the model is performing. It can also be combined with
+automatic code and AI evaluators in a single run.
+
+[file:5d6078ad-b591-47c2-81a0-015b3bc28d32]
+
+Set up your first Human Evaluator run by following our guide. [/docs/guides/evaluating-with-human-feedback]
+
+
+RETURN INPUTS FLAG
+
+We've introduced a return_inputs flag on our chat and completion endpoints to improve performance for larger payloads.
+
+As context model windows get increasingly larger, for example Claude with 200k tokens
+[https://www.anthropic.com/index/claude-2-1], it's important to make sure our APIs remain performant. A contributor to response
+times is the size of the response payload being sent over the wire.
+
+When you set this new flag to false, our responses will no longer contain the inputs that were sent to the model and so can be
+significantly smaller. This is the first in a sequence of changes to add more control to the caller around API behaviour.
+
+As always, we welcome suggestions, requests, and feedback should you have any.
+
+
+GEMINI
+
+You can now use Google's latest LLMs, Gemini [https://blog.google/technology/ai/google-gemini-ai/], in Humanloop.
+
+
+SETUP
+
+To use Gemini, first go to https://makersuite.google.com/app/apikey [https://makersuite.google.com/app/apikey] and generate an API
+key. Then, save this under the "Google" provider on your API keys page [http://app.humanloop.com/account/api-keys].
+
+Head over to the playground, and you should see gemini-pro and gemini-pro-vision in your list of models.
+
+[file:432800b8-3cc7-4751-b977-5643c880c68d]
+
+You can also now use Gemini through the Humanloop API's /chatendpoints.
+
+
+FEATURES
+
+Gemini offers support for multi-turn chats, tool calling, and multi-modality.
+
+However, note that while gemini-pro supports multi-turn chats and tool calling, it does not support multi-modality. On the other
+hand, gemini-pro-vision supports multi-modality but not multi-turn chats or tool calling. Refer to Gemini's docs
+[https://ai.google.dev/models/gemini] for more details.
+
+When providing images to Gemini, we've maintained compatibility with OpenAI's API. This means that when using Humanloop, you can
+provide images either via a HTTP URL or with a base64-encoded data URL.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/12/22",
+ "title": "December 22, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -12833,6 +19763,31 @@ When providing images to Gemini, we've maintained compatibility with OpenAI's AP
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "December 1900",
+ ],
+ "content": "CHAT SESSIONS IN EDITOR
+
+Your chat messages in Editor are now recorded as part of a session so you can more easily keep track of conversations.
+
+[file:abcf95fd-15a8-4db1-a408-59dc0956b68d]
+
+After chatting with a saved prompt, go to the sessions tab and your messages will be grouped together.
+
+If you want to do this with the API, it can be as simple as setting the session_reference_id– see docs on sessions
+[/docs/guides/logging-session-traces].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/12/21",
+ "title": "December 21, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -12889,6 +19844,31 @@ If you want to do this with the API, it can be as simple as setting the \`sessio
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "December 1900",
+ ],
+ "content": "ENVIRONMENT LOGS
+
+Logs for your deployed prompts will now be tagged with the corresponding environment [/docs/guides/deploy-to-an-environment].
+
+In your logs table, you can now filter your logs based on environment:
+
+[file:34486ade-4d48-4ee9-b6bf-7cd2201dc8b8]
+
+You can now also pass an environment tag when using the explicit /log [/api-reference/logs/log] endpoint; helpful for use cases
+such as orchestrating your own models [/docs/guides/use-your-own-model-provider].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/12/13",
+ "title": "December 13, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -12947,117 +19927,51 @@ You can now also pass an \`environment\` tag when using the explicit [/log ](/ap
},
{
"breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2023",
- "title": "2023",
- },
- {
- "slug": "docs/changelog/12",
- "title": "December 1900",
- },
+ "Changelog",
+ "2023",
+ "December 1900",
],
- "description": "We've improved the experience of creating and debugging your evaluators.
+ "content": "IMPROVED EVALUATOR UI
-Now that you can [access any property of the objects you're testing](/api-reference/changelog#llm-evals---improved-data-access) we've cleaned up the debug panel to make easier to view the testcases that you load from a dataset or from your projects.
+We've improved the experience of creating and debugging your evaluators.
-
+Now that you can access any property of the objects you're testing [/api-reference/changelog#llm-evals---improved-data-access]
+we've cleaned up the debug panel to make easier to view the testcases that you load from a dataset or from your projects.
-We've also clarified what the return types are expected as you create your evaluators.",
- "indexSegmentId": "0",
- "slug": "docs/v5/changelog/2023/12/12#improved-evaluator-ui",
- "title": "Improved Evaluator UI",
- "type": "page-v4",
- "version": {
- "id": "v5.0",
- "slug": "docs/getting-started/overview",
- },
- },
- {
- "breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2023",
- "title": "2023",
- },
- {
- "slug": "docs/changelog/12",
- "title": "December 1900",
- },
- ],
- "description": "Following our recent [introduction of our .prompt file](/docs/guides/prompt-file-format), you can now compare your model configs within a project with our new 'diff' view.
+[file:f91107d2-2e7c-4d00-9b91-a36471b8c879]
-![](file:eae9c590-f45f-4fbc-957e-53603380acbf)
+We've also clarified what the return types are expected as you create your evaluators.
-As you modify and improve upon your model configs, you might want to remind yourself of the changes that were made between different versions of your model config. To do so, you can now select 2 model configs in your project dashboard and click **Compare** to bring up a side-by-side comparison between them. Alternatively, open the actions menu and click **Compare to deployed**.
-
+PROMPT DIFFS
-This diff compares the .prompt files representing the two model configs, and will highlight any differences such as in the model, hyperparameters, or prompt template.",
- "indexSegmentId": "0",
- "slug": "docs/v5/changelog/2023/12/12#prompt-diffs",
- "title": "Prompt diffs",
- "type": "page-v4",
- "version": {
- "id": "v5.0",
- "slug": "docs/getting-started/overview",
- },
- },
- {
- "breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2023",
- "title": "2023",
- },
- {
- "slug": "docs/changelog/12",
- "title": "December 1900",
- },
- ],
- "description": "In order to help you write better LLM evaluator prompts, you now have finer-grained access to the objects you are evaluating.
+Following our recent introduction of our .prompt file [/docs/guides/prompt-file-format], you can now compare your model configs
+within a project with our new 'diff' view.
-It's now possible to access any part of the \`log\` and \`testcase\` objects using familiar syntax like \`log.messages[0].content\`. Use the debug console to help understand what the objects look like when writing your prompts.
+[file:eae9c590-f45f-4fbc-957e-53603380acbf]
-![](file:4f5d8445-9687-44ac-89f1-288ac5714058)",
- "indexSegmentId": "0",
- "slug": "docs/v5/changelog/2023/12/12#llm-evals---improved-data-access",
- "title": "LLM evals - improved data access",
- "type": "page-v4",
- "version": {
- "id": "v5.0",
- "slug": "docs/getting-started/overview",
- },
- },
- {
- "breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2023",
- "title": "2023",
- },
- {
- "slug": "docs/changelog/12",
- "title": "December 1900",
- },
- ],
- "description": undefined,
+As you modify and improve upon your model configs, you might want to remind yourself of the changes that were made between
+different versions of your model config. To do so, you can now select 2 model configs in your project dashboard and click Compare
+to bring up a side-by-side comparison between them. Alternatively, open the actions menu and click Compare to deployed.
+
+[file:06b0b0e3-d3b4-463c-bf65-12657d3897a8]
+
+This diff compares the .prompt files representing the two model configs, and will highlight any differences such as in the model,
+hyperparameters, or prompt template.
+
+
+LLM EVALS - IMPROVED DATA ACCESS
+
+In order to help you write better LLM evaluator prompts, you now have finer-grained access to the objects you are evaluating.
+
+It's now possible to access any part of the log and testcase objects using familiar syntax like log.messages[0].content. Use the
+debug console to help understand what the objects look like when writing your prompts.
+
+[file:4f5d8445-9687-44ac-89f1-288ac5714058]",
"indexSegmentId": "0",
- "slug": "docs/v5/changelog/2023/12/5",
- "title": "December 5, 2023",
- "type": "page-v4",
+ "slug": "docs/v5/changelog/2023/12/12",
+ "title": "December 12, 2023",
+ "type": "page-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -13078,18 +19992,16 @@ It's now possible to access any part of the \`log\` and \`testcase\` objects usi
"title": "December 1900",
},
],
- "description": "It's now possible to manage tool definitions globally for your organization and re-use them across multiple projects by linking them to your model configs.
-
-Prior to this change, if you wanted to re-use the same tool definition across multiple model configs, you had to copy and paste the JSON schema snippet defining the name, description and parameters into your Editor for each case. And if you wanted to make changes to this tool, you would have to recall which model configs it was saved to prior and update them inline 1 by 1.
+ "description": "We've improved the experience of creating and debugging your evaluators.
-You can achieve this tool re-use by first defining an instance of our new \`JsonSchema\` tool available as another option in your global \`Tools\` tab. Here you can define a tool once, such as \`get_current_weather(location: string, unit: 'celsius' | 'fahrenheit')\`, and then link that to as many model configs as you need within the Editor as shown below.
+Now that you can [access any property of the objects you're testing](/api-reference/changelog#llm-evals---improved-data-access) we've cleaned up the debug panel to make easier to view the testcases that you load from a dataset or from your projects.
-Importantly, updates to the \`get_current_weather\` \`JsonSchema\` tool defined here will then propagate automatically to all the model configs you've linked it to, without having to publish new versions of the prompt.
+
-The old behaviour of defining the tool inline as part of your model config definition is still available for the cases where you do want changes in the definition of the tool to lead to new versions of the model-config.",
+We've also clarified what the return types are expected as you create your evaluators.",
"indexSegmentId": "0",
- "slug": "docs/v5/changelog/2023/12/5#tool-linking",
- "title": "Tool linking",
+ "slug": "docs/v5/changelog/2023/12/12#improved-evaluator-ui",
+ "title": "Improved Evaluator UI",
"type": "page-v4",
"version": {
"id": "v5.0",
@@ -13111,14 +20023,184 @@ The old behaviour of defining the tool inline as part of your model config defin
"title": "December 1900",
},
],
- "description": "Navigate to the [tools tab](https://app.humanloop.com/hl-test/tools) in your organisation and select the JsonSchema tool card.
+ "description": "Following our recent [introduction of our .prompt file](/docs/guides/prompt-file-format), you can now compare your model configs within a project with our new 'diff' view.
-![](file:6403419e-e962-4627-9353-1747e57d5349)
+![](file:eae9c590-f45f-4fbc-957e-53603380acbf)
-With the dialog open, define your tool with \`name\`, \`description\`, and \`parameters\` values. Our guide for using [OpenAI Function Calling in the playground](/docs/guides/create-a-tool-in-the-editor) can be a useful reference in this case.",
+As you modify and improve upon your model configs, you might want to remind yourself of the changes that were made between different versions of your model config. To do so, you can now select 2 model configs in your project dashboard and click **Compare** to bring up a side-by-side comparison between them. Alternatively, open the actions menu and click **Compare to deployed**.
+
+
+
+This diff compares the .prompt files representing the two model configs, and will highlight any differences such as in the model, hyperparameters, or prompt template.",
"indexSegmentId": "0",
- "slug": "docs/v5/changelog/2023/12/5#set-up-the-tool",
- "title": "Set up the tool",
+ "slug": "docs/v5/changelog/2023/12/12#prompt-diffs",
+ "title": "Prompt diffs",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2023",
+ "title": "2023",
+ },
+ {
+ "slug": "docs/changelog/12",
+ "title": "December 1900",
+ },
+ ],
+ "description": "In order to help you write better LLM evaluator prompts, you now have finer-grained access to the objects you are evaluating.
+
+It's now possible to access any part of the \`log\` and \`testcase\` objects using familiar syntax like \`log.messages[0].content\`. Use the debug console to help understand what the objects look like when writing your prompts.
+
+![](file:4f5d8445-9687-44ac-89f1-288ac5714058)",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/12/12#llm-evals---improved-data-access",
+ "title": "LLM evals - improved data access",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2023",
+ "title": "2023",
+ },
+ {
+ "slug": "docs/changelog/12",
+ "title": "December 1900",
+ },
+ ],
+ "description": undefined,
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/12/5",
+ "title": "December 5, 2023",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "December 1900",
+ ],
+ "content": "TOOL LINKING
+
+It's now possible to manage tool definitions globally for your organization and re-use them across multiple projects by linking
+them to your model configs.
+
+Prior to this change, if you wanted to re-use the same tool definition across multiple model configs, you had to copy and paste
+the JSON schema snippet defining the name, description and parameters into your Editor for each case. And if you wanted to make
+changes to this tool, you would have to recall which model configs it was saved to prior and update them inline 1 by 1.
+
+You can achieve this tool re-use by first defining an instance of our new JsonSchema tool available as another option in your
+global Tools tab. Here you can define a tool once, such as get_current_weather(location: string, unit: 'celsius' | 'fahrenheit'),
+and then link that to as many model configs as you need within the Editor as shown below.
+
+Importantly, updates to the get_current_weather JsonSchema tool defined here will then propagate automatically to all the model
+configs you've linked it to, without having to publish new versions of the prompt.
+
+The old behaviour of defining the tool inline as part of your model config definition is still available for the cases where you
+do want changes in the definition of the tool to lead to new versions of the model-config.
+
+
+SET UP THE TOOL
+
+Navigate to the tools tab [https://app.humanloop.com/hl-test/tools] in your organisation and select the JsonSchema tool card.
+
+[file:6403419e-e962-4627-9353-1747e57d5349]
+
+With the dialog open, define your tool with name, description, and parameters values. Our guide for using OpenAI Function Calling
+in the playground [/docs/guides/create-a-tool-in-the-editor] can be a useful reference in this case.
+
+
+USING THE TOOL
+
+In the editor of your target project, link the tool by pressing the Add Tool button and selecting your get_current_weather tool.
+
+[file:4ad4a9d7-8ce4-4996-b27c-b0cc496276f3]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/12/5",
+ "title": "December 5, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2023",
+ "title": "2023",
+ },
+ {
+ "slug": "docs/changelog/12",
+ "title": "December 1900",
+ },
+ ],
+ "description": "It's now possible to manage tool definitions globally for your organization and re-use them across multiple projects by linking them to your model configs.
+
+Prior to this change, if you wanted to re-use the same tool definition across multiple model configs, you had to copy and paste the JSON schema snippet defining the name, description and parameters into your Editor for each case. And if you wanted to make changes to this tool, you would have to recall which model configs it was saved to prior and update them inline 1 by 1.
+
+You can achieve this tool re-use by first defining an instance of our new \`JsonSchema\` tool available as another option in your global \`Tools\` tab. Here you can define a tool once, such as \`get_current_weather(location: string, unit: 'celsius' | 'fahrenheit')\`, and then link that to as many model configs as you need within the Editor as shown below.
+
+Importantly, updates to the \`get_current_weather\` \`JsonSchema\` tool defined here will then propagate automatically to all the model configs you've linked it to, without having to publish new versions of the prompt.
+
+The old behaviour of defining the tool inline as part of your model config definition is still available for the cases where you do want changes in the definition of the tool to lead to new versions of the model-config.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/12/5#tool-linking",
+ "title": "Tool linking",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2023",
+ "title": "2023",
+ },
+ {
+ "slug": "docs/changelog/12",
+ "title": "December 1900",
+ },
+ ],
+ "description": "Navigate to the [tools tab](https://app.humanloop.com/hl-test/tools) in your organisation and select the JsonSchema tool card.
+
+![](file:6403419e-e962-4627-9353-1747e57d5349)
+
+With the dialog open, define your tool with \`name\`, \`description\`, and \`parameters\` values. Our guide for using [OpenAI Function Calling in the playground](/docs/guides/create-a-tool-in-the-editor) can be a useful reference in this case.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/12/5#set-up-the-tool",
+ "title": "Set up the tool",
"type": "page-v4",
"version": {
"id": "v5.0",
@@ -13177,6 +20259,68 @@ With the dialog open, define your tool with \`name\`, \`description\`, and \`par
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "December 1900",
+ ],
+ "content": "IMPROVED LOG TABLE UI
+
+We've updated how we show logs and datapoints in their respective tables. You can now see the stack of inputs and messages in a
+cleaner interface rather than having them spread into separate columns.
+
+Part of the updated Log Table. Inputs are now stacked with a more consistent and less-busy UI.
+[file:457f922b-7a16-4f77-afa2-38df4729d821]
+
+There will be more updates soon to improve how logs and prompts are shown in tables and the drawers soon, so if you have ideas for
+improvements please let us know.
+
+
+INTRODUCING .PROMPT FILES
+
+We're introducing a .prompt file format for representing model configs in a format that's both human-readable and easy to work
+with.
+
+For certain use cases it can be helpful for engineers to also store their prompts alongside their app's source code in their
+favourite version control system. The .prompt file is the appropriate artefact for this.
+
+These .prompt files can be retrieved through both the API and through the Humanloop app.
+
+
+EXPORTING VIA API
+
+To fetch a .prompt file via the API, make POST request to https://api.humanloop.com/v4/model-configs/{id}/export, where {id} is
+the ID of the model config (beginning with config_).
+
+
+EXPORT FROM HUMANLOOP
+
+You can also export an existing model config as a .prompt file from the app. Find the model config within the project's
+dashboard's table of model configs and open the actions menu by clicking the three dots. Then click Export .prompt. (You can also
+find this button within the drawer that opens after clicking on on a model config's row).
+
+[file:d408a762-fae6-4116-9508-9ea75091ca8e]
+
+
+EDITOR
+
+Additionally, we've added the ability to view and edit your model configs in a .prompt file format when in Editor. Press
+Cmd-Shift-E when in editor to swap over to a view of your .prompt file.
+
+[file:a8aa81ec-68d1-4fbb-8697-7a476a556ea5]
+
+More details on our .prompt file format are available here [/docs/guides/prompt-file-format]. We'll be building on this and making
+it more powerful. Stay tuned.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/12/4",
+ "title": "December 4, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -13353,6 +20497,295 @@ More details on our .prompt file format are available [here](/docs/guides/prompt
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "November 1900",
+ ],
+ "content": "IMPROVED RBACS
+
+We've introduced more levels to our roles based access controls (RBACs).
+
+We now distinguish between different roles to help you better manage your organization's access levels and permissions on
+Humanloop.
+
+This is the first in a sequence of upgrades we are making around RBACs.
+
+
+ORGANIZATION ROLES
+
+Everyone invited to the organization can access all projects currently (controlling project access coming soon).
+
+A user can be one of the following rolws:
+
+**Admin:**The highest level of control. They can manage, modify, and oversee the organization's settings and have full
+functionality across all projects.
+
+Developer:(Enterprise tier only) Can deploy prompts, manage environments, create and add API keys, but lacks the ability to access
+billing or invite others.
+
+Member:(Enterprise tier only) The basic level of access. Can create and save prompts, run evaluations, but not deploy. Can not see
+any org-wide API keys.
+
+
+RBACS SUMMARY
+
+Here is the full breakdown of roles and access:
+
+Action Member Developer Admin Create and manage Prompts ✔️ ✔️ ✔️ Inspect logs and feedback ✔️ ✔️ ✔️ Create and manage evaluators
+✔️ ✔️ ✔️ Run evaluations ✔️ ✔️ ✔️ Create and manage datasets ✔️ ✔️ ✔️ Create and manage API keys ✔️ ✔️ Manage prompt deployments
+✔️ ✔️ Create and manage environments ✔️ ✔️ Send invites ✔️ Set user roles ✔️ Manage billing ✔️ Change organization settings ✔️
+
+
+SELF HOSTED EVALUATIONS
+
+We've added support for managing evaluations [/docs/guides/evaluate-your-model] outside of Humanloop in your own code.
+
+There are certain use cases where you may wish to run your evaluation process outside of Humanloop, where the evaluator itself is
+defined in your code as opposed to being defined using our Humanloop runtime.
+
+For example, you may have implemented an evaluator that uses your own custom model, or has to interact with multiple systems. In
+which case, it can be difficult to define these as a simple code or LLM evaluator [/docs/guides/use-llms-to-evaluate-logs] within
+your Humanloop project.
+
+With this kind of setup, our users have found it very beneficial to leverage the datasets they have curated on Humanloop, as well
+as consolidate all of the results alongside the prompts stored on Humanloop.
+
+To better support this setting, we're releasing additional API endpoints and SDK utilities. We've added endpoints that allow you
+to:
+
+ * Retrieve your curated datasets
+ * Trigger evaluation runs
+ * Send evaluation results for your datasets generated using your custom evaluators
+
+Below is a code snippet showing how you can use the latest version of the Python SDK to log an evaluation run to a Humanloop
+project. For a full explanation, see our guide [/docs/guides/self-hosted-evaluations] on self-hosted evaluations.
+
+from humanloop import Humanloop
+
+API_KEY = ...
+humanloop = Humanloop(api_key=API_KEY)
+
+# 1. Retrieve a dataset
+DATASET_ID = ...
+datapoints = humanloop.datasets.list_datapoints(DATASET_ID).records
+
+# 2. Create an external evaluator
+evaluator = humanloop.evaluators.create(
+ name="My External Evaluator",
+ description="An evaluator that runs outside of Humanloop runtime.",
+ type="external",
+ arguments_type="target_required",
+ return_type="boolean",
+)
+# Or, retrieve an existing one:
+# evaluator = humanloop.evaluators.get(EVALUATOR_ID)
+
+# 3. Retrieve a model config
+CONFIG_ID = ...
+model_config = humanloop.model_configs.get(CONFIG_ID)
+
+# 4. Create the evaluation run
+PROJECT_ID = ...
+evaluation_run = humanloop.evaluations.create(
+ project_id=PROJECT_ID,
+ config_id=CONFIG_ID,
+ evaluator_ids=[EVALUATOR_ID],
+ dataset_id=DATASET_ID,
+)
+
+# 5. Iterate the datapoints and trigger generations
+logs = []
+for datapoint in datapoints:
+ log = humanloop.chat_model_config(
+ project_id=PROJECT_ID,
+ model_config_id=model_config.id,
+ inputs=datapoint.inputs,
+ messages=[
+ {key: value for key, value in dict(message).items() if value is not None}
+ for message in datapoint.messages
+ ],
+ source_datapoint_id=datapoint.id,
+ ).data[0]
+ logs.append((log, datapoint))
+
+# 6. Evaluate the results.
+# In this example, we use an extremely simple evaluation, checking for an exact
+# match between the target and the model's actual output.
+for (log, datapoint) in logs:
+ # The datapoint target tells us the correct answer.
+ target = str(datapoint.target["answer"])
+
+ # The log output is what the model said.
+ model_output = log.output
+
+ # The evaluation is a boolean, indicating whether the model was correct.
+ result = target == model_output
+
+ # Post the result back to Humanloop.
+ evaluation_result_log = humanloop.evaluations.log_result(
+ log_id=log.id,
+ evaluator_id=evaluator.id,
+ evaluation_run_external_id=evaluation_run.id,
+ result=result,
+ )
+
+# 7. Complete the evaluation run.
+humanloop.evaluations.update_status(id=evaluation_run.id, status="completed")
+
+
+
+CHAT RESPONSE
+
+We've updated the response models of all of our /chat [/api-reference/chats/create] API endpoints to include an output message
+object.
+
+Up to this point, our chat and completion endpoints had a unified response model, where the content of the assistant message
+returned by OpenAI models was provided in the common output field for each returned sample. And any tool calls made were provided
+in the separate tool_calls field.
+
+When making subsequent chat calls, the caller of the API had to use these fields to create a message object to append to the
+history of messages. So to improve this experience we now added an output_message field to the chat response. This is additive and
+does not represent a breaking change.
+
+Before:
+
+{
+ "project_id": "pr_GWx6n0lv6xUu3HNRjY8UA",
+ "data": [
+ {
+ "id": "data_Vdy9ZoiFv2B7iYLIh15Jj",
+ "index": 0,
+ "output": "Well, I gotta say, ...",
+ "raw_output": "Well, I gotta say...",
+ "finish_reason": "length",
+ "model_config_id": "config_VZAPd51sJH7i3ZsjauG2Q",
+ "messages": [
+ {
+ "content": "what's your best guess...",
+ "role": "user",
+ }
+ ],
+ "tool_calls": null
+ }
+ ],
+...
+...
+...
+}
+
+
+After:
+
+{
+ "project_id": "pr_GWx6n0lv6xUu3HNRjY8UA",
+ "data": [
+ {
+ "id": "data_Vdy9ZoiFv2B7iYLIh15Jj",
+ "output_message": {
+ "content": "Well, I gotta say, ...",
+ "name": null,
+ "role": "assistant",
+ "tool_calls": null
+ },
+ "index": 0,
+ "output": "Well, I gotta say, ...",
+ "raw_output": "Well, I gotta say...",
+ "finish_reason": "length",
+ "model_config_id": "config_VZAPd51sJH7i3ZsjauG2Q",
+ "messages": [
+ {
+ "content": "what's your best guess...",
+ "role": "user",
+ }
+ ],
+ "tool_calls": null,
+ }
+ ],
+...
+...
+...
+}
+
+
+
+SNIPPET TOOL
+
+We've added support for managing common text 'snippets' (or 'passages', or 'chunks') that you want to reuse across your different
+prompts.
+
+This functionality is provided by our new Snippet tool. A Snippet tool acts as a simple key/value store, where the key is the name
+of the common re-usable text snippet and the value is the corresponding text.
+
+For example, you may have some common persona descriptions that you found to be effective across a range of your LLM features. Or
+maybe you have some specific formatting instructions that you find yourself re-using again and again in your prompts.
+
+Before now, you would have to copy and paste between your editor sessions and keep track of which projects you edited. Now you can
+instead inject the text into your prompt using the Snippet tool.
+
+
+SET UP THE TOOL
+
+Navigate to the tools tab [https://app.humanloop.com/hl-test/tools] in your organisation and select the Snippet tool card.
+
+[file:77789645-25a5-474c-8eb4-32e916a73195]
+
+When the dialog opens, start adding your key/value pairs. In the example below we've defined an Assistants snippet tool that can
+be used manage some common persona descriptions we feed to the LLM.
+
+You can have up to 10 key/value snippets in a single snippet tool.
+
+The name field will be how you'll access this tool in the editor. By setting the value as assistant below it means in the editor
+you'll be able to access this specific tool by using the syntax {{ assistant(key) }}.
+
+The key is how you'll access the snippet later, so it's recommended to choose something short and memorable.
+
+The value is the passage of text that will be included in your prompt when it is sent to the model.
+
+[file:e9b7f2c2-3cca-4175-ab20-bdf54b5db8bd]
+
+
+USE THE TOOL
+
+Now your Snippets are set up, you can use it to populate strings in your prompt templates across your projects. Double curly
+bracket syntax is used to call a tool in the template. Inside the curly brackets you call the tool.
+
+[file:2a58c905-2a8a-459e-9976-d64abf888653]
+
+The tool requires an input value to be provided for the key. In our editor environment [https://app.humanloop.com/playground] the
+result of the tool will be shown populated top right above the chat.
+
+Above we created an Assistants tool. To use that in an editor you'd use the {{ (key) }} so in this case it would
+be {{ assistant(key) }}. When adding that you get an inputs field appear where you can specify your key, in the screenshot above
+we used the helpful key to access the You are a helpful assistant. You like to tell jokes and if anyone asks your name is
+Sam.string. This input field can be used to experiment with different key/value pairs to find the best one to suit your prompt.
+
+If you want to see the corresponding snippet to the key you either need to first run the conversation to fetch the string and see
+it in the preview.
+
+If you have a specific key you would like to hardcode in the prompt, you can define it using the literal key value: {{
+("key") }}, so in this case it would be {{ assistant("helpful") }}.
+
+[file:40e29736-c096-4c1e-a664-1c49d645b9dc]
+
+This is particularly useful because you can define passages of text once in a snippet tool and reuse them across multiple prompts,
+without needing to copy/paste them and manually keep them all in sync.
+
+
+WHAT'S NEXT
+
+Explore our other tools such as the Google or Pinecone Search. If you have other ideas for helpful integrations please reach out
+and let us know.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/11/28",
+ "title": "November 28, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -13823,6 +21256,57 @@ This is particularly useful because you can define passages of text once in a sn
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "November 1900",
+ ],
+ "content": "QUALITY-OF-LIFE APP IMPROVEMENTS
+
+We've been shipping some quality-of-life "little big things" to improve your every day usage of the platform.
+
+
+PROJECT SWITCHER THROUGHOUT THE APP
+
+We've added the project switcher throughout the app so its easier to jump between Projects from anywhere
+
+The project switcher is now available everywhere. [file:89480a79-fb18-41fa-97bb-d5593943d785]
+
+
+WE'VE TIDIED UP THE EDITOR
+
+With all the new capabilities and changes (tools, images and more) we need to keep a tight ship to stop things from becoming too
+busy.
+
+We're unifying how we show all your logged generations, in the editor, and in the logs and sessions. We've also changed the font
+to Inter to be legible at small font sizes.
+
+The Editor and other places have had a clean up to aid the new capabilites of tool calling and vision.
+[file:769c0b96-2938-4b99-bf2b-2f05be03b146]
+
+
+NO MORE ACCIDENTAL BLANK MESSAGES
+
+We've also fixed issues where empty messages would get appended to the chat.
+
+
+WE'VE IMPROVED KEYBOARD NAVIGATION
+
+The keyboard shortcuts have been updated so its now easier to navigate in the log tables (up/down keys), and to run generations in
+Editor (cmd/ctrl + enter).
+
+
+THANKS FOR ALL YOUR REQUESTS AND TIPS. PLEASE KEEP THE FEEDBACK COMING!",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/11/22",
+ "title": "November 22, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -13995,6 +21479,33 @@ We're unifying how we show all your logged generations, in the editor, and in th
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "November 1900",
+ ],
+ "content": "CLAUDE 2.1
+
+Today, Anthropic released its latest model, Claude 2.1, and we've added support for it in the Humanloop app.
+
+[file:dc434fcf-8384-4a42-9802-274fb34fe73d]
+
+The new model boasts a 200K context window and a reported 2x decrease in hallucination rates.
+
+Additionally, this model introduces tool use to the line-up of Anthropic models. The feature is presently in beta preview, and
+we'll be adding support for it to Humanloop in the coming days.
+
+Read more about Claude 2.1 in the official release notes [https://www.anthropic.com/index/claude-2-1].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/11/21",
+ "title": "November 21, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -14054,6 +21565,201 @@ Read more about Claude 2.1 in the [official release notes](https://www.anthropic
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "November 1900",
+ ],
+ "content": "PARALLEL TOOL CALLING
+
+We've added support for parallel tool calls in our Editor and API.
+
+With the release of the latest OpenAI turbo models, the model can choose to respond with more than one tool call for a given
+query; this is referred to as parallel tool calling
+[https://platform.openai.com/docs/guides/function-calling/parallel-function-calling].
+
+
+EDITOR UPDATES
+
+You can now experiment with this new feature in our Editor:
+
+ * Select one of the new turbo models [/changelog/] in the model dropdown.
+ * Specify a tool in your model config on the left hand side.
+ * Make a request that would require multiple calls to answer correctly.
+ * As shown here for a weather example, the model will respond with multiple tool calls in the same message
+
+[file:04278432-757d-4435-8c5f-56100fc44459]
+
+
+API IMPLICATIONS
+
+We've added an additional field tool_calls to our chat endpoints response model that contains the array of tool calls returned by
+the model. The pre-existing tool_call parameter remains but is now marked as deprecated.
+
+Each element in the tool_calls array has an id associated to it. When providing the tool response back to the model for one of the
+tool calls, the tool_call_id must be provided, along with role=tool and the content containing the tool response.
+
+from humanloop import Humanloop
+
+# Initialize the Humanloop SDK with your API Keys
+humanloop = Humanloop(api_key="")
+
+# form of message when providing the tool response to the model
+chat_response = humanloop.chat_deployed(
+ project_id="",
+ messages: [
+ {
+ "role": "tool",
+ "content": "Horribly wet"
+ "tool_call_id": "call_dwWd231Dsdw12efoOwdd"
+ }
+ ]
+)
+
+
+
+PYTHON SDK IMPROVEMENTS
+
+We've improved the response models of our Python SDK [https://github.com/humanloop/humanloop-python#raw-http-response] and now
+give users better control over HTTPs timeout settings.
+
+
+IMPROVED RESPONSE MODEL TYPES
+
+As of versions >= 0.6.0, our Python SDK methods now return Pydantic [https://docs.pydantic.dev/latest/] models instead of typed
+dicts. This improves developer ergonomics around typing and validations.
+
+ * Previously, you had to use the [...] syntax to access response values:
+
+chat_response = humanloop.chat(
+ # parameters
+ )
+print(chat_response.project_id)
+
+
+ * With Pydantic-based response values, you now can use the . syntax to access response values. To access existing response model
+ from < 0.6.0, use can still use the .raw namespace as specified in the Raw HTTP Response section
+ [https://github.com/humanloop/humanloop-python#raw-http-response].
+
+chat_response = humanloop.chat(
+ # parameters
+ )
+print(chat_response.project_id)
+
+
+> 🚧 Breaking change
+>
+> Moving to >= 0.6.0 does represent a breaking change in the SDK. The underlying API remains unchanged.
+
+
+SUPPORT FOR TIMEOUT PARAMETER
+
+The default timeout used by aiohttp [https://docs.aiohttp.org/en/stable/], which our SDK uses is 300 seconds. For very large
+prompts and the latest models, this can cause timeout errors to occur.
+
+In the latest version of Python SDKs, we've increased the default timeout value to 600 seconds and you can update this
+configuration if you are still experiencing timeout issues by passing the new timeout argument to any of the SDK methods. For
+example passingtimeout=1000 will override the timeout to 1000 seconds.
+
+
+MULTI-MODAL MODELS
+
+We've introduced support for multi-modal models that can take both text and images as inputs!
+
+We've laid the foundations for multi-modal model support as part of our Editor and API. The first model we've configured is
+OpenAI's GPT-4 with Vision (GPT-4V) [https://platform.openai.com/docs/guides/vision/vision]. You can now select
+gpt-4-vision-preview in the models dropdown and add images to your chat messages via the API.
+
+Let us know what other multi-modal models you would like to see added next!
+
+
+EDITOR QUICK START
+
+To get started with GPT-4V, go to the Playground, or Editor within your project.
+
+ * Select gpt-4-vision-preview in the models dropdown.
+ * Click the Add images button within a user's chat message.
+ * To add an image, either type a URL into the Image URL textbox or select "Upload image" to upload an image from your computer.
+ If you upload an image, it will be converted to a Base64-encoded data URL that represents the image.
+ * Note that you can add multiple images
+
+[file:8d079720-60dc-483d-aef1-1e6ed8c9e3cd]
+
+To view the images within a log, find the log within the logs table and click on it to open it in a drawer. The images in each
+chat message be viewed within this drawer.
+
+[file:4aeb2a4f-e28c-420a-a605-a9621931e298]
+
+
+API QUICK START
+
+Assuming you have deployed your gpt-4-vision-preview based model config, you can now also include images in messages via the API.
+
+from humanloop import Humanloop
+
+# Initialize the Humanloop SDK with your API Keys
+humanloop = Humanloop(api_key="")
+
+# humanloop.chat_deployed(...) will call the active model config on your project.
+chat_response = humanloop.chat_deployed(
+ project_id="",
+ messages: [
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image_url",
+ "image_url": {
+ "detail": "high",
+ "url": "https://www.acomaanimalclinictucson.com/wp-content/uploads/2020/04/AdobeStock_288690671-scaled.jpeg"
+ }
+ }
+ ]
+)
+
+
+Any generations made will also be viewable from within your projects logs table.
+
+
+LIMITATIONS
+
+There are some know limitations with the current preview iteration of OpenAI's GPT-4 model to be aware of:
+
+ * Image messages are only supported by the gpt-4-vision-preview model in chat mode.
+ * GPT-4V model does not support tool calling or JSON mode.
+ * You cannot add images to the first system message.
+
+
+JSON MODE AND SEED PARAMETERS
+
+We've introduced new model config parameters for JSON mode and Seed in our Editor and API.
+
+With the introduction of the new OpenAI turbo models [https://docs.humanloop.com/changelog/gpt4-turbo-preview] you can now set
+additional properties that impact the behaviour of the model; response_format and seed.
+
+> See further guidance from OpenAI on the JSON response format
+[here](https://platform.openai.com/docs/guides/text-generation/json-mode) and reproducing outputs using the seed parameter
+[here](https://platform.openai.com/docs/guides/text-generation/reproducible-outputs).
+
+These new parameters can now optionally contribute to your model config in our Editor and API. Updated values for response_format
+or seed will constitute new versions of your model on Humanloop.
+
+[file:9b09aa60-7621-4d2e-a4df-00100a30e85a] When using JSON mode with the new turbo models, you should still include formatting
+instructions in your prompt.
+
+In fact, if you do not include the word 'json' anywhere in your prompt, OpenAI will return a validation error currently.
+
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/11/20",
+ "title": "November 20, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -14498,6 +22204,77 @@ In fact, if you do not include the word 'json' anywhere in your prompt, OpenAI w
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "November 1900",
+ ],
+ "content": "LLM EVALUATORS
+
+Until now, it's been possible to trigger LLM-based evaluations by writing Python code that uses the Humanloop API to trigger the
+LLM generations.
+
+Today, in order to make this increasingly important workflow simpler and more intuitive, we're releasing LLM Evaluators, which
+require no Python configuration.
+
+From the Evaluations page, click New Evaluator and select LLM Evaluator.
+
+You can now choose between the existing Python Evaluators and our new LLM Evaluators. [file:4f3985cd-4c63-474a-b630-542a689ba655]
+
+Instead of a code editor, the right hand side of the page is now a prompt editor for defining instructions to the LLM Evaluator.
+Underneath the prompt, you can configure the parameters of the Evaluator (things like model, temperature etc.) just like any
+normal model config.
+
+LLM Evaluator Editor. [file:c3f52783-6241-4ca2-ae5c-c901d0d02987]
+
+In the prompt editor, you have access to a variety of variables that correspond to data from the underlying Log that you are
+trying to evaluate. These use the usual {{ variable }} syntax, and include:
+
+ * log_inputs - the input variables that were passed in to the prompt template when the Log was generated
+ * log_prompt - the fully populated prompt (if it was a completion mode generation)
+ * log_messages - a JSON representation of the messages array (if it was a chat mode generation)
+ * log_output - the output produced by the model
+ * log_error - if the underlying Log was an unsuccessful generation, this is the error that was produced
+ * testcase - when in offline mode, this is the testcase that was used for the evaluation.
+
+Take a look at some of the presets we've provided on the left-hand side of the page for inspiration.
+
+LLM Evaluator presets. You'll likely need to tweak these to fit your use case. [file:2873a37b-8901-4e0a-b306-09ebdef2fb09]
+
+At the bottom of the page you can expand the debug console - this can be used verify that your Evaluator is working as intended.
+We've got further enhancements coming to this part of the Evaluator Editor very soon.
+
+Since an LLM Evaluator is just another model config managed within Humanloop, it gets its own project. When you create an LLM
+Evaluator, you'll see that a new project is created in your organisation with the same name as the Evaluator. Every time the
+Evaluator produces a Log as part of its evaluation activity, that output will be visible in the Logs tab of that project.
+
+
+IMPROVED EVALUATOR EDITOR
+
+Given our current focus on delivering a best-in-class evaluations experience, we've promoted the Evaluator editor to a full-page
+screen in the app.
+
+[file:be95c8e8-5c87-4176-9446-a197c8a8adbf]
+
+In the left-hand pane, you'll find drop-downs to:
+
+ * Select the mode of the Evaluator - either Online or Offline, depending on whether the Evaluator is intended to run against
+ pre-defined testcases or against live production Logs
+ * Select the return type of the Evaluator - either boolean or number
+
+Underneath that configuration you'll find a collection of presets.
+
+Preset selector. [file:8d9962ae-7992-4199-975d-261c54c96117]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/11/17",
+ "title": "November 17, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -14614,6 +22391,42 @@ Underneath that configuration you'll find a collection of presets.
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "November 1900",
+ ],
+ "content": "EVALUATION COMPARISON CHARTS
+
+We've added comparison charts to the evaluation runs page to help you better compare your evaluation results. These can be found
+in the evaluations run tab for each of your projects.
+
+[file:27d505f5-2522-49a2-9e2b-6c9568c79cc6]
+
+
+COMPARING RUNS
+
+You can use this to compare specific evaluation runs by selecting those in the runs table. If you don't select any specific rows
+the charts show an averaged view of all the previous runs for all the evaluators.
+
+[file:d5af22d3-88c4-4163-8b1f-f578ccc8b88b]
+
+
+HIDING A CHART
+
+To hide a chart for a specific evaluator you can hide the column in the table and it will hide the corresponding chart.
+
+[file:eae088ba-cfea-4e96-ae82-331523b2d291]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/11/10",
+ "title": "November 10, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -14728,6 +22541,55 @@ Underneath that configuration you'll find a collection of presets.
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "November 1900",
+ ],
+ "content": "COMPARISON MODE IN EDITOR
+
+You can now compare generations across Model Configs and inputs in Editor!
+
+[file:2b33e4b9-8bbb-4658-800f-7e51ac746e61]
+
+
+QUICK START
+
+To enter comparison mode, click New panel in the dropdown menu adds a new blank panel to the right.
+
+Duplicate panel adds a new panel containing the same information as your current panel.
+
+[Clicking **New panel** in the dropdown menu... [file:f72bfe15-fcf6-42ca-9bc1-c262984d4e02]
+
+... will open a new panel to the right. [file:86abe170-c335-4a4c-89f1-50f521a34c9a]
+
+Each panel is split into two section: a Model Config section at the top and an Inputs & Chat section at the bottom. These can be
+collapsed and resized to suit your experimentation.
+
+If you've made changes in one panel, you can copy the changes you've made using the Copy button in the subsection's header and
+paste it in the target panel using its corresponding Paste button.
+
+The **Copy** button on the left panel will copy the new chat template... [file:edae6eeb-6df2-4b4e-8157-ebd4b40b9dc8] ... and the
+**Paste** button on the right panel will then update its chat template. [file:32999dd8-e8d9-467a-b0eb-8e0801239b4c]
+
+
+OTHER CHANGES
+
+Our recently-introduced local history has also been upgraded to save your full session even when you have multiple panels open.
+
+The toggle to completion mode and the button to open history have now been moved into the new dropdown menu.
+
+[file:c13a2015-3c77-4d85-a795-fb27f146dc89]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/11/9",
+ "title": "November 9, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -14859,6 +22721,72 @@ The toggle to completion mode and the button to open history have now been moved
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "November 1900",
+ ],
+ "content": "IMPROVED EVALUATION RUNS
+
+You can now trigger runs against multiple model configs simultaneously.
+
+This improves your ability to compare and evaluate changes across your prompts. We've also removed the summary cards. In their
+place, we've added a table that supports sorting and rearranging of columns to help you better interrogate results.
+
+
+MULTIPLE MODEL CONFIGS
+
+To run evaluations against multiple model configs it's as simple as selecting the targeted model configs in the run dialog,
+similar to before, but multiple choices are now supported. This will trigger multiple evaluation runs at once, with each model
+config selected as a target.
+
+[file:0742395d-cf92-4493-b49c-75aa8b716ffe]
+
+
+EVALUATION TABLE
+
+We've updated our evaluation runs with a table to help view the outcomes of runs in a more condensed form. It also allows you to
+sort results and trigger re-runs easier. As new evaluators are included, a column will be added automatically to the table.
+
+[file:bab799a2-4b6f-4b2a-a4ee-20248aee89c3]
+
+
+RE-RUN PREVIOUS EVALUATIONS
+
+We've exposed the re-run option in the table to allow you to quickly trigger runs again, or use older runs as a way to preload the
+dialog and change the parameters such as the target dataset or model config.
+
+[file:73678520-4070-4cde-aa29-73f9181e590e]
+
+
+NEW OPENAI TURBOS
+
+Off the back of OpenAI's dev day [https://devday.openai.com/] we've added support for the new turbo models
+[https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo] that were announced:
+
+ * gpt-4-1106-preview
+ * gpt-3.5-turbo-1106
+
+Both of these models add a couple of nice capabilities:
+
+ * Better instruction following performance
+ * JSON mode that forces the model to return valid JSON
+ * Can call multiple tools at once
+ * Set a seed for reproducible outputs
+
+You can now access these in your Humanloop Editor and via the API.
+
+[file:d1e74557-544f-4df5-8890-83222766b99f]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/11/8",
+ "title": "November 8, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -15043,6 +22971,29 @@ You can now access these in your Humanloop Editor and via the API.
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "November 1900",
+ ],
+ "content": "IMPROVED LOGS DRAWER
+
+You can now resize the message section in the Logs and Session drawers, allowing you to review your logs more easily.
+
+[file:5d7f7925-45d6-4233-9301-5683313d05c3]
+
+To resize the message section we've introduced a resize bar that you can drag up or down to give yourself the space needed. To
+reset the layout back to default just give the bar a double click.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/11/1",
+ "title": "November 1, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -15097,6 +23048,34 @@ To resize the message section we've introduced a resize bar that you can drag up
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "October 1900",
+ ],
+ "content": "LOCAL EDITOR HISTORY
+
+The Humanloop playground and editor now save history locally as you make edits, giving you complete peace of mind that your
+precisely-crafted prompts will not be lost due to an accidental page reload or navigating away.
+
+[file:c03db74a-106a-4d69-9909-5dc0901cc0cc]
+
+Local history entries will be saved as you use the playground (e.g. as you modify your model config, make generations, or add
+messages). These will be visible under the Local tab within the history side panel. Local history is saved to your browser and is
+only visible to you.
+
+Our shared history feature, where all playground generations are saved, has now been moved under the Shared tab in the history
+side panel.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/10/30",
+ "title": "October 30, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -15153,6 +23132,69 @@ Our shared history feature, where all playground generations are saved, has now
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "October 1900",
+ ],
+ "content": "PROJECT FOLDERS
+
+You can now organize your projects into folders!
+
+Logging in to Humanloop will bring you to the new page where you can start arranging your projects.
+
+[file:ba126627-d15b-4bf4-8aa9-91f8b6e18482]
+
+Navigate into folders and open projects by clicking on the row. To go back to a parent folder, click on the displayed breadcrumbs
+(e.g. "Projects" or "Development" in the above screenshot).
+
+----------------------------------------------------------------------------------------------------------------------------------
+
+
+SEARCH
+
+Searching will give you a list of directories and projects with a matching name.
+
+[file:3c559b84-b124-4a32-8da1-93c10abbd1b3]
+
+
+MOVING MULTIPLE PROJECTS
+
+You can move a group of projects and directories by selecting them and moving them together.
+
+ 1. Select the projects you want to move.
+ Tip: Put your cursor on a project row and press [x] to select the row.
+ 2. To move the selected projects into a folder, drag and drop them into the desired folder.
+
+[file:e215c0e0-ccf7-4bd5-96c7-965597ab8503]
+
+To move projects out of a folder and into a parent folder, you can drag and drop them onto the parent folder breadcrumbs:
+
+[file:7f5b1499-3d1a-4fbb-b79e-b17fb43e8ec4]
+
+To move projects into deeply nested folders, it might be easier to select your target directory manually. To do so, select the
+projects you wish to move and then click the blue Actions button and then click Move ... to bring up a dialog allowing you to move
+the selected projects.
+
+[file:5294ec62-1279-424e-850f-3c18159914cf]
+
+[file:fb8f93ea-a77a-445b-9c1a-d6ad50360a13]
+
+----------------------------------------------------------------------------------------------------------------------------------
+
+If you prefer the old view, we've kept it around for now. Let us know what you're missing from the new view so we can improve it.
+
+The [Go to old layout] button will take you to the previous view without folders. [file:1314b175-ce1b-4dc8-96fe-388edd6201a3]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/10/17",
+ "title": "October 17, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -15293,6 +23335,50 @@ If you prefer the old view, we've kept it around for now. Let us know what you'r
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "October 1900",
+ ],
+ "content": "DATASETS
+
+We've introduced Datasets to Humanloop. Datasets are collections of Datapoints, which represent input-output pairs for an LLM
+call.
+
+We recently released Datasets in our Evaluations beta, by the name Evaluation Testsets. We're now promoting the concept to a
+first-class citizen within your projects. If you've previously been using testsets in the evaluations beta, you'll see that your
+testsets have now automatically migrated to datasets.
+
+Datasets can be created via CSV upload, converting from existing Logs in your project, or by API requests.
+
+See our guides on datasets [/docs/guides/datasets], which show how to upload from CSV and perform a batch generation across the
+whole dataset.
+
+A single dataset that has been added to a project, with 9 datapoints. [file:57d041c5-dde1-4c19-9183-32c2c08c78cb]
+
+Clicking into a dataset, you can explore its datapoints.
+
+Datapoints are pre-defined input-output pairs. [file:250dded1-5204-4b31-934d-14f2cef065e1]
+
+A dataset contains a collection of prompt variable inputs (the dynamic values which are interpolated into your model config prompt
+template at generation-time), as well as a collection of messages forming the chat history, and a target output with data
+representing what we expect the model to produce when it runs on those inputs.
+
+Datasets are useful for evaluating the behaviour of you model configs across a well-defined collection of test cases. You can use
+datasets to check for regressions as you iterate your model configs, knowing that you are checking behaviour against a
+deterministic collection of known important examples.
+
+Datasets can also be used as collections of input data for fine-tuning jobs.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/10/16",
+ "title": "October 16, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -15361,6 +23447,104 @@ Datasets can also be used as collections of input data for **fine-tuning** jobs.
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "October 1900",
+ ],
+ "content": "GET API TOOL
+
+We've added support for a tool that can make GET calls to an external API.
+
+This can be used to dynamically retrieve context for your prompts. For example, you may wish to get additional information about a
+user from your system based on their ID, or look up additional information based on a query from a user.
+
+To set up the tool you need to provide the following details for your API:
+
+Tool parameter Description Example Name A unique tool name to reference as a call signature in your prompts get_api_tool URL The
+URL for your API endpoint https://your-api.your-domain.com [https://your-api.your-domain.com] API Key Header The authentication
+header required by your endpoint. X-API-KEY API Key The API key value to use in the authentication header. sk_1234567891011121314
+Query parameters A comma delimited list of the query parameters to set when making requests. user_query, client_id
+
+
+DEFINE YOUR API
+
+First you will need to define your API. For demo purposes, we will create a mock endpoint in postman
+[https://learning.postman.com/docs/designing-and-developing-your-api/mocking-data/setting-up-mock/]. Our mock endpoint
+[https://www.postman.com/humanloop/workspace/humanloop/request/12831443-9c48e591-b7b2-4a17-b56a-8050a133e1b5] simply returns
+details about a mock user given their user_id.
+
+A call to our Mock API in Python is as follows; note the query parameter user_id
+
+import requests
+
+url = "https://01a02b84-08c5-4e53-b283-a8c2beef331c.mock.pstmn.io/users?user_id=01234567891011"
+headers = {
+ 'X-API-KEY': ''
+}
+response = requests.request("GET", url, headers=headers)
+print(response.text)
+
+
+And returns the response:
+
+{
+ "user_id", "012345678910",
+ "name": "Albert",
+ "company": "Humanloop",
+ "role": "Engineer"
+}
+
+
+We can now use this tool to inject information for a given user into our prompts.
+
+
+SET UP THE TOOL
+
+Navigate to the tools tab [https://app.humanloop.com/hl-test/tools] in your organisation and select the Get API Call tool card:
+
+[file:d2d52bdc-ad31-45a1-86bd-e915f5e78781]
+
+Configure the tool with your API details:
+
+[file:46a5fdc2-46fd-4f4b-9cb1-f276677baea5]
+
+
+USE THE TOOL
+
+Now your API tool is set up, you can use it to populate input variables in your prompt templates. Double curly bracket syntax is
+used to call a tool in the template. The call signature is the unique tool name with arguments for the query parameters defined
+when the tool was set up.
+
+In our mock example, the signature will be: get_user_api(user_id).
+
+An example prompt template using this tool is:
+
+You are a helpful assistant. Please draft an example job role summary for the following user:
+
+User details: {{ get_user_api(user_id) }}
+Keep it short and concise.
+
+
+The tool requires an input value to be provided for user_id. In our playground environment [https://app.humanloop.com/playground]
+the result of the tool will be shown populated top right above the chat:
+
+[file:bb06e5e1-e703-4963-8110-9ae093aa5263]
+
+
+WHAT'S NEXT
+
+Explore more complex examples of context stuffing such as defining your own custom RAG service.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/10/10",
+ "title": "October 10, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -15588,6 +23772,92 @@ The tool requires an input value to be provided for user_id. In our [playground
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "September 1900",
+ ],
+ "content": "EVALUATIONS IMPROVEMENTS
+
+We've released a couple of minor useability improvements in the evaluations workflow.
+
+
+SUMMARY STATISTICS FOR EVALUATION RUNS
+
+When reviewing past runs of evaluations, you can now see summary statistics for each evaluator before clicking into the detail
+view, allowing for easier comparison between runs.
+
+[file:00b994f8-b8b2-424d-8bed-d67c9a4845be]
+
+
+RE-RUNNING EVALUATIONS
+
+To enable easier re-running of past evaluations, you can now click the Re-run button in the top-right of the evaluation detail
+view.
+
+[file:457e4853-2790-4904-a348-8953f0720b86]
+
+
+EDITOR - COPY TOOLS
+
+Our Editor environment let's users incorporate OpenAI function calling
+[https://openai.com/blog/function-calling-and-other-api-updates] into their prompt engineering workflows by defining tools. Tools
+are made available to the model as functions to call using the same universal JSON schema format.
+
+As part of this process it can be helpful to copy the full JSON definition of the tool for quickly iterating on new versions, or
+copy and pasting it into code. You can now do this directly from the tool definition in Editor:
+
+[file:eb4a5f88-cff7-4934-a2cf-6dec3dc04641]
+
+Selecting the Copy button adds the full JSON definition of the tool to your clipboard:
+
+{
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "name": "Location",
+ "description": "The city and state, e.g. San Francisco, CA"
+ },
+ "unit": {
+ "type": "string",
+ "name": "Unit",
+ "enum": [
+ "celsius",
+ "fahrenheit"
+ ]
+ }
+ },
+ "required": [
+ "location"
+ ]
+ }
+}
+
+
+
+SINGLE SIGN ON (SSO)
+
+We've added support for SOO to our signup, login and invite flows. By default users can now use their Gmail accounts to access
+Humanloop.
+
+For our enterprise customers, this also unlocks the ability for us to more easily support their SAML-based single sign-on (SSO)
+set ups.
+
+[file:735f138f-c6af-44d3-a176-c2183b55551a]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/9/15",
+ "title": "September 15, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -15789,6 +24059,42 @@ For our enterprise customers, this also unlocks the ability for us to more easil
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "September 1900",
+ ],
+ "content": "ORGANIZATION SLUG IN URLS
+
+We have altered routes specific to your organization to include the organization slug. The organization slug is a unique value
+that was derived from your organization name when your organization was created.
+
+For project paths we've dropped the projects label in favour of a more specific project label.
+
+An example of what this looks like can be seen below:
+
+[file:67962e6f-c2e2-4b5b-960a-469b9b732748] When a request is made to one of the legacy URL paths, we'll redirect it to the
+corresponding new path. Although the legacy routes are still supported, we encourage you to update your links and bookmarks to
+adopt the new naming scheme.
+
+
+UPDATING YOUR ORGANIZATION SLUG
+
+The organization slug can be updated by organization administrators. This can be done by navigating to the general settings
+[https://app.humanloop.com/account/organization] page. Please exercise caution when changing this, as it will affect the URLs
+across the organization.
+
+[file:1ddb18fa-51c9-4805-9b41-6b79176b5aca]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/9/13",
+ "title": "September 13, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -15882,24 +24188,40 @@ When a request is made to one of the legacy URL paths, we'll redirect it to the
},
{
"breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2023",
- "title": "2023",
- },
- {
- "slug": "docs/changelog/8",
- "title": "August 1900",
- },
+ "Changelog",
+ "2023",
+ "August 1900",
],
- "description": "You can now add **trusted email domains** to your organization. Adding trusted email domains allows new users, when creating an account with a matching email, to join your organization without requiring an invite.",
+ "content": "ALLOW TRUSTED EMAIL DOMAINS
+
+You can now add trusted email domains to your organization. Adding trusted email domains allows new users, when creating an
+account with a matching email, to join your organization without requiring an invite.
+
+
+MANAGING TRUSTED DOMAINS
+
+Adding and removing trusted email domains is controlled from your organizations General settings
+[https://app.humanloop.com/account/organization] page.
+
+Only Admins can manage trusted domains for an organization.
+
+To add a new trusted domain press the Add domain button and enter the domains trusted by your organization. The domains added here
+will check against new users signing up to Humanloop and if there is a match those users will be given the option to join your
+organization.
+
+[file:79b22084-7977-4012-abe8-1c9afff46375]
+
+
+SIGNUP FOR NEW USERS
+
+New users signing up to Humanloop will see the following screen when they signup with an email that matches and organizations
+trusted email domain. By pressing Join they will be added to the matching organization.
+
+[file:aea9088c-c9ff-499a-a686-61a1415a9aa6]",
"indexSegmentId": "0",
- "slug": "docs/v5/changelog/2023/8/31#allow-trusted-email-domains",
- "title": "Allow trusted email domains",
- "type": "page-v4",
+ "slug": "docs/v5/changelog/2023/8/31",
+ "title": "August 31, 2023",
+ "type": "page-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -15919,23 +24241,48 @@ When a request is made to one of the legacy URL paths, we'll redirect it to the
"slug": "docs/changelog/8",
"title": "August 1900",
},
- {
- "slug": "docs/v5/changelog/2023/8/31#allow-trusted-email-domains",
- "title": "Allow trusted email domains",
- },
],
- "description": "Adding and removing trusted email domains is controlled from your organizations [General settings](https://app.humanloop.com/account/organization) page.
-
-
-Only Admins can manage trusted domains for an organization.
-
-
-To add a new trusted domain press the **Add domain** button and enter the domains trusted by your organization. The domains added here will check against new users signing up to Humanloop and if there is a match those users will be given the option to join your organization.
-
-",
+ "description": "You can now add **trusted email domains** to your organization. Adding trusted email domains allows new users, when creating an account with a matching email, to join your organization without requiring an invite.",
"indexSegmentId": "0",
- "slug": "docs/v5/changelog/2023/8/31#managing-trusted-domains",
- "title": "Managing trusted domains",
+ "slug": "docs/v5/changelog/2023/8/31#allow-trusted-email-domains",
+ "title": "Allow trusted email domains",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2023",
+ "title": "2023",
+ },
+ {
+ "slug": "docs/changelog/8",
+ "title": "August 1900",
+ },
+ {
+ "slug": "docs/v5/changelog/2023/8/31#allow-trusted-email-domains",
+ "title": "Allow trusted email domains",
+ },
+ ],
+ "description": "Adding and removing trusted email domains is controlled from your organizations [General settings](https://app.humanloop.com/account/organization) page.
+
+
+Only Admins can manage trusted domains for an organization.
+
+
+To add a new trusted domain press the **Add domain** button and enter the domains trusted by your organization. The domains added here will check against new users signing up to Humanloop and if there is a match those users will be given the option to join your organization.
+
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/8/31#managing-trusted-domains",
+ "title": "Managing trusted domains",
"type": "page-v4",
"version": {
"id": "v5.0",
@@ -15998,6 +24345,26 @@ To add a new trusted domain press the **Add domain** button and enter the domain
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "August 1900",
+ ],
+ "content": "EDITOR - INSERT NEW MESSAGE WITHIN EXISTING CHAT
+
+You can now insert a new message within an existing chat in our Editor. Click the plus button that appears between the rows.
+
+[file:57c97bae-8334-43b9-af02-5dbb9c1e3fb9]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/8/21",
+ "title": "August 21, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -16050,6 +24417,29 @@ To add a new trusted domain press the **Add domain** button and enter the domain
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "August 1900",
+ ],
+ "content": "CLAUDE INSTANT 1.2
+
+We've added support for Anthropic's latest model Claude instant 1.2! Claude Instant is the faster and lower-priced yet still very
+capable model from Anthropic, great for use cases where low latency and high throughput are required.
+
+You can use Claude instant 1.2 directly within the Humanloop playground and deployment workflows.
+
+Read more about the latest Claude instant model here [https://www.anthropic.com/index/releasing-claude-instant-1-2].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/8/15",
+ "title": "August 15, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -16104,6 +24494,42 @@ Read more about the latest Claude instant model [here](https://www.anthropic.com
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "August 1900",
+ ],
+ "content": "OFFLINE EVALUATIONS WITH TESTSETS
+
+We're continuing to build and release more functionality to Humanloop's evaluations framework!
+
+Our first release provided the ability to run online evaluators in your projects. Online evaluators allow you to monitor the
+performance of your live deployments by defining functions which evaluate all new datapoints in real time as they get logged to
+the project.
+
+Today, to augment online evaluators, we are releasing offline evaluators as the second part of our evaluations framework.
+
+Offline evaluators provide the ability to test your prompt engineering efforts rigorously in development and CI. Offline
+evaluators test the performance of your model configs against a pre-defined suite of testcases - much like unit testing in
+traditional programming.
+
+With this framework, you can use test-driven development practices to iterate and improve your model configs, while monitoring for
+regressions in CI.
+
+To learn more about how to use online and offline evaluators, check out the Evaluate your model [/docs/guides/evaluate-your-model]
+section of our guides.
+
+[file:85348eb6-f900-47f8-96d0-1c37d5f0d125]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/8/14",
+ "title": "August 14, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -16166,6 +24592,66 @@ To learn more about how to use online and offline evaluators, check out the [Eva
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "July 1900",
+ ],
+ "content": "IMPROVED ERROR HANDLING
+
+We've unified how errors returned by model providers are handled and enabled error monitoring using eval functions
+[/docs/guides/evaluate-your-model].
+
+A common production pain point we see is that hosted SOTA language models can still be flaky at times, especially at real scale.
+With this release, Humanloop can help users better understand the extent of the problem and guide them to different models choices
+to improve reliability.
+
+
+UNIFIED ERRORS
+
+Our users integrate the Humanloop /chat and /completion API endpoints as a unified interface into all the popular model providers
+including OpenAI, Anthropic, Azure, Cohere, etc. Their Humanloop projects can then be used to manage model experimentation,
+versioning, evaluation and deployment.
+
+Errors returned by these endpoints may be raised by the model provider's system. With this release we've updated our API to map
+all the error behaviours from different model providers to a unified set of error response codes
+[/api-reference/errors#http-error-codes].
+
+We've also extended our error responses to include more details of the error with fields for type, message, code and origin. The
+origin field indicates if the error originated from one of the integrated model providers systems, or directly from Humanloop.
+
+For example, for our /chat endpoint where we attempt to call OpenAI with an invalid setting for max_tokens, the message returned
+is that raised by OpenAI and the origin is set to OpenAI.
+
+{
+ "type": "unprocessable_entity_error",
+ "message": "This model's maximum context length is 4097 tokens. However, you requested 10000012 tokens (12 in the messages, 10000000 in the completion). Please reduce the length of the messages or completion.",
+ "code": 422,
+ "origin": "OpenAI"
+}
+
+
+
+MONITOR MODEL RELIABILITY WITH EVALS
+
+With this release, all errors returned from the different model providers are now persisted with the corresponding input data as
+datapoints on Humanloop. Furthermore this error data is made available to use within evaluation functions
+[/docs/guides/evaluate-your-model].
+
+You can now turn on the Errors eval function, which tracks overall error rates of the different model variations in your project.
+Or you can customise this template to track more specific error behaviour.
+
+Errors evaluation function template now available [file:57dccce3-7587-45e1-b666-30806ab65920]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/7/30",
+ "title": "July 30, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -16295,6 +24781,73 @@ You can now turn on the **Errors** eval function, which tracks overall error rat
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "July 1900",
+ ],
+ "content": "OPENAI FUNCTIONS IN PLAYGROUND
+
+We've added support for OpenAI functions [https://platform.openai.com/docs/guides/gpt/function-calling] to our playground!
+
+This builds on our API support [https://humanloop.com/docs/changelog/2023/7/3] and allows you to easily experiment with OpenAI
+functions within our playground UI.
+
+OpenAI functions are implemented as tools [https://humanloop.com/docs/guides/set-up-semantic-search] on Humanloop. Tools follow
+the same universal json-schema [https://json-schema.org/] definition as OpenAI functions. You can now define tools as part of your
+model configuration in the playground. These tools are sent as OpenAI functions when running the OpenAI chat models that support
+function calling.
+
+The model can choose to return a JSON object containing the arguments needed to call a function. This object is displayed as a
+special assistant message within the playground. You can then provide the result of the call in a message back to the model to
+consider, which simulates the function calling workflow.
+
+
+USE TOOLS IN PLAYGROUND
+
+Take the following steps to use tools for function calling in the playground:
+
+ 1. Find tools: Navigate to the playground and locate the Tools section. This is where you'll be able to manage your tool
+ definitions.
+
+[file:4507074f-3d29-4ea5-b231-329757e00a90]
+
+ 2. Create a new tool: Click on the "Add Tool" button. There are two options in the dropdown: create a new tool or to start with
+ one of our examples. You define your tool using the json-schema [https://json-schema.org/] syntax. This represents the
+ function definition sent to OpenAI.
+
+[file:5c4cfad2-5e18-429a-9a6d-1a24f6285c2d]
+
+ 3. Edit a tool: To edit an existing tool, simply click on the tool in the Tools section and make the necessary changes to its
+ json-schema definition. This will result in a new model configuration.
+
+[file:0a23afc8-a949-4ea2-a0dd-e309e230b5f3]
+
+ 4. Run a model with tools: Once you've defined your tools, you can run the model by pressing the "Run" button.
+ 1. If the model chooses to call a function, an assistant message will be displayed with the corresponding tool name and
+ arguments to use.
+ 2. A subsequent Tool message is then displayed to simulate sending the results of the call back to the model to consider.
+
+[file:9e260b75-d1f1-4969-8946-f13b38ff7fc2]
+
+ 5. Save your model config with tools by using the Save button. Model configs with tools defined can then deployed to environments
+ [/docs/guides/deploy-to-an-environment] as normal.
+
+
+COMING SOON
+
+Provide the runtime for your tool under the existing pre-defined Tools section [https://app.humanloop.com/tools] of your
+organization on Humanloop.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/7/25",
+ "title": "July 25, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -16429,6 +24982,32 @@ The model can choose to return a JSON object containing the arguments needed to
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "July 1900",
+ ],
+ "content": "LLAMA 2
+
+We've added support for Llama 2!
+
+You can now select llama70b-v2 from the model dropdown in the Playground and Editor. You don't currently need to provide an API
+key or any other special configuration to get Llama 2 access via Humanloop.
+
+Llama 2 is available in Playground and Editor for all Humanloop users. [file:178d36c6-481b-4364-baf9-187d6c693a56]
+
+Read more about the latest version of Llama here [https://ai.meta.com/llama/] and in the original announcement
+[https://about.fb.com/news/2023/07/llama-2/].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/7/24",
+ "title": "July 24, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -16486,6 +25065,26 @@ Read more about the latest version of Llama [here](https://ai.meta.com/llama/) a
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "July 1900",
+ ],
+ "content": "CLAUDE 2
+
+We've added support for Anthropic's latest model Claude 2.0!
+
+Read more about the latest Claude here [https://www.anthropic.com/index/claude-2].",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/7/17",
+ "title": "July 17, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -16540,28 +25139,149 @@ Read more about the latest Claude [here](https://www.anthropic.com/index/claude-
},
{
"breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2023",
- "title": "2023",
- },
- {
- "slug": "docs/changelog/7",
- "title": "July 1900",
- },
+ "Changelog",
+ "2023",
+ "July 1900",
],
- "description": "We've added **Evaluators** to Humanloop in beta!
+ "content": "EVALUATORS
-Evaluators allow you to quantitatively define what constitutes a good or bad output from your models. Once set up, you can configure an Evaluators to run automatically across all new datapoints as they appear in your project; or, you can simply run it manually on selected datapoints from the **Data** tab.
+We've added Evaluators to Humanloop in beta!
-We're going to be adding lots more functionality to this feature in the coming weeks, so check back for more!",
+Evaluators allow you to quantitatively define what constitutes a good or bad output from your models. Once set up, you can
+configure an Evaluators to run automatically across all new datapoints as they appear in your project; or, you can simply run it
+manually on selected datapoints from the Data tab.
+
+We're going to be adding lots more functionality to this feature in the coming weeks, so check back for more!
+
+
+CREATE AN EVALUATOR
+
+If you've been given access to the feature, you'll see a new Evaluations tab in the Humanloop app. To create your first evaluation
+function, select + New Evaluator. In the dialog, you'll be presented with a library of example Evaluators, or you can start from
+scratch.
+
+We offer a library of example Evaluators to get you started. [file:74f3bcca-6c05-418e-a8fe-8850f9d92a29]
+
+We'll pick Valid JSON for this guide.
+
+Evaluator editor. [file:30d2be57-1314-45de-8752-e197f4912862]
+
+In the editor, provide details of your function's name, description and return type. In the code editor, you can provide a
+function which accepts a datapoint argument and should return a value of the chosen type.
+
+Currently, the available return types for an Evaluators are number and boolean. You should ensure that your function returns the
+expected data type - an error will be raised at runtime if not.
+
+THE DATAPOINT ARGUMENT
+
+The datapoint passed into your function will be a Python dict with the following structure.
+
+{
+ "id":"data_XXXX", # Datapoint id
+ "model_config": {...}, # Model config used to generate the datapoint
+ "inputs": {...}, # Model inputs (interpolated into the prompt)
+ "output": "...", # Generated output from the model
+ "provider_latency": 0.6, # Provider latency in seconds
+ "metadata": {...}, # Additional metadata attached to the logged datapoint
+ "created_at": "...", # Creation timestamp
+ "feedback": [...] # Array of feedback provided on the datapoint
+}
+
+
+To inspect datapoint dictionaries in more detail, click Random selection in the debug console at the bottom of the window. This
+will load a random set of five datapoints from your project, exactly as they will be passed into the Evaluation Function.
+
+The debug console - load datapoints to inspect the argument passed into Evaluators. [file:a31953a3-3c29-48db-903b-815a938bfda2]
+
+For this demo, we've created a prompt which asks the model to produce valid JSON as its output. The Evaluator uses a simple
+json.loads call to determine whether the output is validly formed JSON - if this call raises an exception, it means that the
+output is not valid JSON, and we return False.
+
+import json
+
+def check_valid_json(datapoint):
+ try:
+ return json.loads(datapoint["output"]) is not None
+ except:
+ return False
+
+
+DEBUGGING
+
+Once you have drafted a Python function, try clicking the run button next to one of the debug datapoints in the debug console. You
+should shortly see the result of executing your function on that datapoint in the table.
+
+A \`True\` result from executing the **Valid JSON** Evaluators on the datapoint. [file:e04bf40a-0043-450c-be2a-bea076573b57]
+
+If your Evaluator misbehaves, either by being invalid Python code, raising an unhandled exception or returning the wrong type, an
+error will appear in the result column. You can hover this error to see more details about what went wrong - the exception string
+is displayed in the tooltip.
+
+Once you're happy with your Evaluator, click Create in the bottom left of the dialog.
+
+
+ACTIVATE / DEACTIVATE AN EVALUATOR
+
+Your Evaluators are available across all your projects. When you visit the Evaluations tab from a specific project, you'll see all
+Evaluators available in your organisation.
+
+Each Evaluator has a toggle. If you toggle the Evaluator on, it will run on every new datapoint that gets logged to that project.
+(Switch to another project and you'll see that the Evaluator is not yet toggled on if you haven't chosen to do so).
+
+You can deactivate an Evaluator for a project by toggling it back off at any time.
+
+
+AGGREGATIONS AND GRAPHS
+
+At the top of the Dashboard tab, you'll see new charts for each activated Evaluation Function. These display aggregated Evaluation
+results through time for datapoints in the project.
+
+At the bottom of the Dashboard tab is a table of all the model configs in your project. That table will display a column for each
+activated Evaluator in the project. The data displayed in this column is an aggregation of all the Evaluation Results (by model
+config) for each Evaluator. This allows you to assess the relative performance of your models.
+
+Evaluation Results through time, by model config. In this example, one of the model configs is not producing Valid JSON outputs,
+while the other is about 99% of the time. [file:9b6738cc-12cb-4b6a-9bd6-cc5d2de72c51]
+
+AGGREGATION
+
+For the purposes of both the charts and the model configs table, aggregations work as follows for the different return types of
+Evaluators:
+
+ * Boolean: percentage returning True of the total number of evaluated datapoints
+ * Number: average value across all evaluated datapoints
+
+
+DATA LOGS
+
+In the Data tab, you'll also see that a column is visible for each activated Evaluator, indicating the result of running the
+function on each datapoint.
+
+The **Data** tab for a project, showing the **Valid JSON** Evaluation Results for a set of datapoints.
+[file:35009934-65d3-437f-8246-e3f69484eba9]
+
+From this tab, you can choose to re-run an Evaluator on a selection of datapoints. Either use the menu at the far right of a
+single datapoint, or select multiple datapoints and choose Run evals from the Actions menu in the top right.
+
+
+AVAILABLE MODULES
+
+The following Python modules are available to be imported in your Evaluation Function:
+
+ * math
+ * random
+ * datetime
+ * json (useful for validating JSON grammar as per the example above)
+ * jsonschema (useful for more fine-grained validation of JSON output - see the in-app example)
+ * sqlglot (useful for validating SQL query grammar)
+ * requests (useful to make further LLM calls as part of your evaluation - see the in-app example for a suggestion of how to get
+ started).
+
+Let us know if you would like to see more modules available.",
"indexSegmentId": "0",
- "slug": "docs/v5/changelog/2023/7/7#evaluators",
- "title": "Evaluators",
- "type": "page-v4",
+ "slug": "docs/v5/changelog/2023/7/7",
+ "title": "July 7, 2023",
+ "type": "page-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -16581,27 +25301,56 @@ We're going to be adding lots more functionality to this feature in the coming w
"slug": "docs/changelog/7",
"title": "July 1900",
},
- {
- "slug": "docs/v5/changelog/2023/7/7#evaluators",
- "title": "Evaluators",
- },
],
- "description": "If you've been given access to the feature, you'll see a new **Evaluations** tab in the Humanloop app. To create your first evaluation function, select **+ New Evaluator**. In the dialog, you'll be presented with a library of example Evaluators, or you can start from scratch.
-
-
-
-
-We'll pick **Valid JSON** for this guide.
-
-
-
+ "description": "We've added **Evaluators** to Humanloop in beta!
-In the editor, provide details of your function's name, description and return type. In the code editor, you can provide a function which accepts a \`datapoint\` argument and should return a value of the chosen type.
+Evaluators allow you to quantitatively define what constitutes a good or bad output from your models. Once set up, you can configure an Evaluators to run automatically across all new datapoints as they appear in your project; or, you can simply run it manually on selected datapoints from the **Data** tab.
-Currently, the available return types for an Evaluators are \`number\` and \`boolean\`. You should ensure that your function returns the expected data type - an error will be raised at runtime if not.",
+We're going to be adding lots more functionality to this feature in the coming weeks, so check back for more!",
"indexSegmentId": "0",
- "slug": "docs/v5/changelog/2023/7/7#create-an-evaluator",
- "title": "Create an Evaluator",
+ "slug": "docs/v5/changelog/2023/7/7#evaluators",
+ "title": "Evaluators",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2023",
+ "title": "2023",
+ },
+ {
+ "slug": "docs/changelog/7",
+ "title": "July 1900",
+ },
+ {
+ "slug": "docs/v5/changelog/2023/7/7#evaluators",
+ "title": "Evaluators",
+ },
+ ],
+ "description": "If you've been given access to the feature, you'll see a new **Evaluations** tab in the Humanloop app. To create your first evaluation function, select **+ New Evaluator**. In the dialog, you'll be presented with a library of example Evaluators, or you can start from scratch.
+
+
+
+
+We'll pick **Valid JSON** for this guide.
+
+
+
+
+In the editor, provide details of your function's name, description and return type. In the code editor, you can provide a function which accepts a \`datapoint\` argument and should return a value of the chosen type.
+
+Currently, the available return types for an Evaluators are \`number\` and \`boolean\`. You should ensure that your function returns the expected data type - an error will be raised at runtime if not.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/7/7#create-an-evaluator",
+ "title": "Create an Evaluator",
"type": "page-v4",
"version": {
"id": "v5.0",
@@ -16911,6 +25660,63 @@ Let us know if you would like to see more modules available.",
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "July 1900",
+ ],
+ "content": "CHAIN LLM CALLS
+
+We've introduced sessions to Humanloop, allowing you to link multiple calls together when building a chain or agent.
+
+Using sessions with your LLM calls helps you troubleshoot and improve your chains and agents.
+
+Trace of an Agent's steps logged as a session [file:0fa78831-9e22-4d51-b94f-e4e119a4e059]
+
+
+ADDING A DATAPOINT TO A SESSION
+
+To log your LLM calls to a session, you just need to define a unique identifier for the session and pass it into your Humanloop
+calls with session_reference_id.
+
+For example, using uuid4() to generate this ID,
+
+import uuid
+session_reference_id = str(uuid.uuid4())
+
+response = humanloop.complete(
+ project="sessions_example_assistant",
+ model_config={
+ "prompt_template": "Question: {{user_request}}\\nGoogle result: {{google_answer}}\\nAnswer:\\n",
+ "model": "text-davinci-002",
+ "temperature": 0,
+ },
+ inputs={"user_request": user_request, "google_answer": google_answer},
+ session_reference_id=session_reference_id,
+)
+
+
+Similarly, our other methods such as humanloop.complete_deployed(), humanloop.chat(), and humanloop.log() etc. support
+session_reference_id.
+
+If you're using our API directly, you can pass session_reference_id within the request body in your POST /v4/completion etc.
+endpoints.
+
+
+FURTHER DETAILS
+
+For a more detailed walkthrough on how to use session_reference_id, check out our guide [/docs/guides/logging-session-traces] that
+runs through how to record datapoints to a session in an example script.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/7/5",
+ "title": "July 5, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -17047,91 +25853,49 @@ If you're using our API directly, you can pass \`session_reference_id\` within t
},
{
"breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2023",
- "title": "2023",
- },
- {
- "slug": "docs/changelog/7",
- "title": "July 1900",
- },
+ "Changelog",
+ "2023",
+ "July 1900",
],
- "description": "Today we’re announcing Tools as a part of Humanloop.
+ "content": "INTRODUCING TOOLS
-Tools allow you to connect an LLM to any API and to an array of data sources to give it extra capabilities and access to private data. Under your organization settings on Humanloop you can now configure and manage tools in a central place.
+Today we’re announcing Tools as a part of Humanloop.
-Read more on [our blog](https://humanloop.com/blog/announcing-tools) and see an example of setting up a [tool for semantic search](/docs/guides/set-up-semantic-search).",
- "indexSegmentId": "0",
- "slug": "docs/v5/changelog/2023/7/3#introducing-tools",
- "title": "Introducing Tools",
- "type": "page-v4",
- "version": {
- "id": "v5.0",
- "slug": "docs/getting-started/overview",
- },
- },
- {
- "breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2023",
- "title": "2023",
- },
- {
- "slug": "docs/changelog/7",
- "title": "July 1900",
- },
- ],
- "description": "We've updated our APIs to support [OpenAI function calling](https://platform.openai.com/docsgpt/function-calling).
+Tools allow you to connect an LLM to any API and to an array of data sources to give it extra capabilities and access to private
+data. Under your organization settings on Humanloop you can now configure and manage tools in a central place.
-OpenAI functions are now supported as tools on Humanloop. This allows you to pass tool definitions as part of the model configuration when calling our \`chat\` and \`log\` endpoints. For the latest OpenAI models \`gpt-3.5-turbo-0613\` and \`gpt-4-0613\` the model can then choose to output a JSON object containing arguments to call these tools.
+Read more on our blog [https://humanloop.com/blog/announcing-tools] and see an example of setting up a tool for semantic search
+[/docs/guides/set-up-semantic-search].
-This unlocks getting more reliable structured data back from the model and makes it easier to create useful agents.",
- "indexSegmentId": "0",
- "slug": "docs/v5/changelog/2023/7/3#openai-functions-api",
- "title": "OpenAI functions API",
- "type": "page-v4",
- "version": {
- "id": "v5.0",
- "slug": "docs/getting-started/overview",
- },
- },
- {
- "breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2023",
- "title": "2023",
- },
- {
- "slug": "docs/changelog/7",
- "title": "July 1900",
- },
- {
- "slug": "docs/v5/changelog/2023/7/3#openai-functions-api",
- "title": "OpenAI functions API",
- },
- ],
- "description": "As described in the [OpenAI documentation](https://platform.openai.com/docsgpt/function-calling), the basic steps for using functions are:
-1. Call one of the models \`gpt-3.5-turbo-0613\` and \`gpt-4-0613\` with a user query and a set of function definitions described using the universal [json-schema](https://json-schema.org/) syntax.
-2. The model can then choose to call one of the functions provided. If it does, a stringified JSON object adhering to your json schema definition will be returned.
-3. You can then parse the string into JSON in your code and call the chosen function with the provided arguments (**NB:** the model may hallucinate or return invalid json, be sure to consider these scenarios in your code).
-4. Finally call the model again by appending the function response as a new message. The model can then use this information to respond to the original use query.
+OPENAI FUNCTIONS API
-OpenAI have provided a simple example in their docs for a \`get_current_weather\` function that we will show how to adapt to use with Humanloop:
+We've updated our APIs to support OpenAI function calling [https://platform.openai.com/docsgpt/function-calling].
+
+OpenAI functions are now supported as tools on Humanloop. This allows you to pass tool definitions as part of the model
+configuration when calling our chat and log endpoints. For the latest OpenAI models gpt-3.5-turbo-0613 and gpt-4-0613 the model
+can then choose to output a JSON object containing arguments to call these tools.
+
+This unlocks getting more reliable structured data back from the model and makes it easier to create useful agents.
+
+
+RECAP ON OPENAI FUNCTIONS
+
+As described in the OpenAI documentation [https://platform.openai.com/docsgpt/function-calling], the basic steps for using
+functions are:
+
+ 1. Call one of the models gpt-3.5-turbo-0613 and gpt-4-0613 with a user query and a set of function definitions described using
+ the universal json-schema [https://json-schema.org/] syntax.
+ 2. The model can then choose to call one of the functions provided. If it does, a stringified JSON object adhering to your json
+ schema definition will be returned.
+ 3. You can then parse the string into JSON in your code and call the chosen function with the provided arguments (NB: the model
+ may hallucinate or return invalid json, be sure to consider these scenarios in your code).
+ 4. Finally call the model again by appending the function response as a new message. The model can then use this information to
+ respond to the original use query.
+
+OpenAI have provided a simple example in their docs for a get_current_weather function that we will show how to adapt to use with
+Humanloop:
-\`\`\`python
import openai
import json
@@ -17209,81 +25973,32 @@ def run_conversation():
print(run_conversation())
-\`\`\`",
- "indexSegmentId": "0",
- "slug": "docs/v5/changelog/2023/7/3#recap-on-openai-functions",
- "title": "Recap on OpenAI functions",
- "type": "page-v4",
- "version": {
- "id": "v5.0",
- "slug": "docs/getting-started/overview",
- },
- },
- {
- "breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2023",
- "title": "2023",
- },
- {
- "slug": "docs/changelog/7",
- "title": "July 1900",
- },
- {
- "slug": "docs/v5/changelog/2023/7/3#openai-functions-api",
- "title": "OpenAI functions API",
- },
- ],
- "description": "OpenAI functions are treated as tools on Humanloop. Tools conveniently follow the same universal json-schema definition as OpenAI functions.
-We've expanded the definition of our model configuration to also include tool definitions. Historically the model config is made up of the chat template, choice of base model and any hyper-parameters that change the behaviour of the model.
-In the cases of OpenAIs \`gpt-3.5-turbo-0613\` and \`gpt-4-0613\` models, any tools defined as part of the model config are passed through as functions for the model to use.
-You can now specify these tools when using the Humanloop chat endpoint (as a replacement for OpenAI's ChatCompletion), or when using the Humanloop log endpoint in addition to the OpenAI calls:",
- "indexSegmentId": "0",
- "slug": "docs/v5/changelog/2023/7/3#using-with-humanloop-tools",
- "title": "Using with Humanloop tools",
- "type": "page-v4",
- "version": {
- "id": "v5.0",
- "slug": "docs/getting-started/overview",
- },
- },
- {
- "breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2023",
- "title": "2023",
- },
- {
- "slug": "docs/changelog/7",
- "title": "July 1900",
- },
- {
- "slug": "docs/v5/changelog/2023/7/3#openai-functions-api",
- "title": "OpenAI functions API",
- },
- {
- "slug": "docs/v5/changelog/2023/7/3#using-with-humanloop-tools",
- "title": "Using with Humanloop tools",
- },
- ],
- "description": "We show here how to update the \`run_conversation()\` method from the OpenAI example to instead use the Humanloop chat endpoint with tools:
+USING WITH HUMANLOOP TOOLS
+
+OpenAI functions are treated as tools on Humanloop. Tools conveniently follow the same universal json-schema definition as OpenAI
+functions.
+
+We've expanded the definition of our model configuration to also include tool definitions. Historically the model config is made
+up of the chat template, choice of base model and any hyper-parameters that change the behaviour of the model.
+
+In the cases of OpenAIs gpt-3.5-turbo-0613 and gpt-4-0613 models, any tools defined as part of the model config are passed through
+as functions for the model to use.
+
+You can now specify these tools when using the Humanloop chat endpoint (as a replacement for OpenAI's ChatCompletion), or when
+using the Humanloop log endpoint in addition to the OpenAI calls:
+
+CHAT ENDPOINT
+
+We show here how to update the run_conversation() method from the OpenAI example to instead use the Humanloop chat endpoint with
+tools:
-\`\`\`python
from humanloop import Humanloop
hl = Humanloop(
- # get your API key here: https://app.humanloop.com/account/api-keys
+ # get your API key here: https://app.humanloop.com/account/api-keys
api_key="YOUR_API_KEY",
)
@@ -17291,7 +26006,7 @@ def run_conversation():
# Step 1: send the conversation and available functions to GPT
messages = [{"role": "user", "content": "What's the weather like in Boston?"}]
# functions are referred to as tools on Humanloop, but follows the same schema
- tools = [
+ tools = [
{
"name": "get_current_weather",
"description": "Get the current weather in a given location",
@@ -17312,7 +26027,7 @@ def run_conversation():
project="Assistant",
model_config={
"model": "gpt-3.5-turbo-0613",
- "tools": tools
+ "tools": tools
},
messages=messages
)
@@ -17351,50 +26066,470 @@ def run_conversation():
messages=messages
)
return second_response
-\`\`\`
-After running this snippet, the model configuration recorded on your project in Humanloop will now track what tools were provided to the model and the logged datapoints will provide details of the tool called to inspect:
-![](file:d3fcafd5-8dff-4584-bdf5-980a87614427)",
- "indexSegmentId": "0",
- "slug": "docs/v5/changelog/2023/7/3#chat-endpoint",
- "title": "Chat endpoint",
- "type": "page-v4",
- "version": {
- "id": "v5.0",
- "slug": "docs/getting-started/overview",
- },
- },
- {
- "breadcrumbs": [
- {
- "slug": "docs/changelog",
- "title": "Changelog",
- },
- {
- "slug": "docs/changelog/2023",
- "title": "2023",
- },
- {
- "slug": "docs/changelog/7",
- "title": "July 1900",
- },
- {
- "slug": "docs/v5/changelog/2023/7/3#openai-functions-api",
- "title": "OpenAI functions API",
- },
- {
- "slug": "docs/v5/changelog/2023/7/3#using-with-humanloop-tools",
- "title": "Using with Humanloop tools",
- },
- ],
- "description": "Alternatively, you can also use the explicit Humanloop log alongside your existing OpenAI calls to achieve the same result:
+After running this snippet, the model configuration recorded on your project in Humanloop will now track what tools were provided
+to the model and the logged datapoints will provide details of the tool called to inspect:
+
+[file:d3fcafd5-8dff-4584-bdf5-980a87614427]
+
+LOG ENDPOINT
+
+Alternatively, you can also use the explicit Humanloop log alongside your existing OpenAI calls to achieve the same result:
-\`\`\`python
from humanloop import Humanloop
hl = Humanloop(
- # get your API key here: https://app.humanloop.com/account/api-keys
+ # get your API key here: https://app.humanloop.com/account/api-keys
+ api_key="YOUR_API_KEY",
+)
+
+def run_conversation():
+ # Step 1: send the conversation and available functions to GPT
+ messages = [{"role": "user", "content": "What's the weather like in Boston?"}]
+ functions = [
+ {
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state, e.g. San Francisco, CA",
+ },
+ "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+ },
+ "required": ["location"],
+ },
+ }
+ ]
+ response = openai.ChatCompletion.create(
+ model="gpt-3.5-turbo-0613",
+ messages=messages,
+ functions=functions,
+ function_call="auto", # auto is default, but we'll be explicit
+ )
+ response_message = response["choices"][0]["message"]
+
+ # log the result to humanloop
+ log_response = hl.log(
+ project="Assistant",
+ model_config={
+ "model": "gpt-3.5-turbo-0613",
+ "tools": tools,
+ },
+ messages=messages,
+ tool_call=response_message.get("function_call")
+ )
+
+ # Step 2: check if GPT wanted to call a function
+ if response_message.get("function_call"):
+ # Step 3: call the function
+ # Note: the JSON response may not always be valid; be sure to handle errors
+ available_functions = {
+ "get_current_weather": get_current_weather,
+ } # only one function in this example, but you can have multiple
+ function_name = response_message["function_call"]["name"]
+ fuction_to_call = available_functions[function_name]
+ function_args = json.loads(response_message["function_call"]["arguments"])
+ function_response = fuction_to_call(
+ location=function_args.get("location"),
+ unit=function_args.get("unit"),
+ )
+
+ # Step 4: send the info on the function call and function response to GPT
+ messages.append(response_message) # extend conversation with assistant's reply
+ messages.append(
+ {
+ "role": "function",
+ "name": function_name,
+ "content": function_response,
+ }
+ ) # extend conversation with function response
+ second_response = openai.ChatCompletion.create(
+ model="gpt-3.5-turbo-0613",
+ messages=messages,
+ ) # get a new response from GPT where it can see the function response
+
+ log_response = hl.log(
+ project="Assistant",
+ model_config={
+ "model": "gpt-3.5-turbo-0613",
+ "tools": tools,
+ },
+ messages=messages,
+ output=second_response["choices"][0]["message"]["content"],
+ )
+ return second_response
+
+
+print(run_conversation())
+
+
+
+COMING SOON
+
+Support for defining tools in the playground!",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/7/3",
+ "title": "July 3, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2023",
+ "title": "2023",
+ },
+ {
+ "slug": "docs/changelog/7",
+ "title": "July 1900",
+ },
+ ],
+ "description": "Today we’re announcing Tools as a part of Humanloop.
+
+Tools allow you to connect an LLM to any API and to an array of data sources to give it extra capabilities and access to private data. Under your organization settings on Humanloop you can now configure and manage tools in a central place.
+
+Read more on [our blog](https://humanloop.com/blog/announcing-tools) and see an example of setting up a [tool for semantic search](/docs/guides/set-up-semantic-search).",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/7/3#introducing-tools",
+ "title": "Introducing Tools",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2023",
+ "title": "2023",
+ },
+ {
+ "slug": "docs/changelog/7",
+ "title": "July 1900",
+ },
+ ],
+ "description": "We've updated our APIs to support [OpenAI function calling](https://platform.openai.com/docsgpt/function-calling).
+
+OpenAI functions are now supported as tools on Humanloop. This allows you to pass tool definitions as part of the model configuration when calling our \`chat\` and \`log\` endpoints. For the latest OpenAI models \`gpt-3.5-turbo-0613\` and \`gpt-4-0613\` the model can then choose to output a JSON object containing arguments to call these tools.
+
+This unlocks getting more reliable structured data back from the model and makes it easier to create useful agents.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/7/3#openai-functions-api",
+ "title": "OpenAI functions API",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2023",
+ "title": "2023",
+ },
+ {
+ "slug": "docs/changelog/7",
+ "title": "July 1900",
+ },
+ {
+ "slug": "docs/v5/changelog/2023/7/3#openai-functions-api",
+ "title": "OpenAI functions API",
+ },
+ ],
+ "description": "As described in the [OpenAI documentation](https://platform.openai.com/docsgpt/function-calling), the basic steps for using functions are:
+
+1. Call one of the models \`gpt-3.5-turbo-0613\` and \`gpt-4-0613\` with a user query and a set of function definitions described using the universal [json-schema](https://json-schema.org/) syntax.
+2. The model can then choose to call one of the functions provided. If it does, a stringified JSON object adhering to your json schema definition will be returned.
+3. You can then parse the string into JSON in your code and call the chosen function with the provided arguments (**NB:** the model may hallucinate or return invalid json, be sure to consider these scenarios in your code).
+4. Finally call the model again by appending the function response as a new message. The model can then use this information to respond to the original use query.
+
+OpenAI have provided a simple example in their docs for a \`get_current_weather\` function that we will show how to adapt to use with Humanloop:
+
+\`\`\`python
+import openai
+import json
+
+
+# Example dummy function hard coded to return the same weather
+# In production, this could be your backend API or an external API
+def get_current_weather(location, unit="fahrenheit"):
+ """Get the current weather in a given location"""
+ weather_info = {
+ "location": location,
+ "temperature": "72",
+ "unit": unit,
+ "forecast": ["sunny", "windy"],
+ }
+ return json.dumps(weather_info)
+
+
+def run_conversation():
+ # Step 1: send the conversation and available functions to GPT
+ messages = [{"role": "user", "content": "What's the weather like in Boston?"}]
+ functions = [
+ {
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state, e.g. San Francisco, CA",
+ },
+ "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+ },
+ "required": ["location"],
+ },
+ }
+ ]
+ response = openai.ChatCompletion.create(
+ model="gpt-3.5-turbo-0613",
+ messages=messages,
+ functions=functions,
+ function_call="auto", # auto is default, but we'll be explicit
+ )
+ response_message = response["choices"][0]["message"]
+
+ # Step 2: check if GPT wanted to call a function
+ if response_message.get("function_call"):
+ # Step 3: call the function
+ # Note: the JSON response may not always be valid; be sure to handle errors
+ available_functions = {
+ "get_current_weather": get_current_weather,
+ } # only one function in this example, but you can have multiple
+ function_name = response_message["function_call"]["name"]
+ fuction_to_call = available_functions[function_name]
+ function_args = json.loads(response_message["function_call"]["arguments"])
+ function_response = fuction_to_call(
+ location=function_args.get("location"),
+ unit=function_args.get("unit"),
+ )
+
+ # Step 4: send the info on the function call and function response to GPT
+ messages.append(response_message) # extend conversation with assistant's reply
+ messages.append(
+ {
+ "role": "function",
+ "name": function_name,
+ "content": function_response,
+ }
+ ) # extend conversation with function response
+ second_response = openai.ChatCompletion.create(
+ model="gpt-3.5-turbo-0613",
+ messages=messages,
+ ) # get a new response from GPT where it can see the function response
+ return second_response
+
+
+print(run_conversation())
+\`\`\`",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/7/3#recap-on-openai-functions",
+ "title": "Recap on OpenAI functions",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2023",
+ "title": "2023",
+ },
+ {
+ "slug": "docs/changelog/7",
+ "title": "July 1900",
+ },
+ {
+ "slug": "docs/v5/changelog/2023/7/3#openai-functions-api",
+ "title": "OpenAI functions API",
+ },
+ ],
+ "description": "OpenAI functions are treated as tools on Humanloop. Tools conveniently follow the same universal json-schema definition as OpenAI functions.
+
+We've expanded the definition of our model configuration to also include tool definitions. Historically the model config is made up of the chat template, choice of base model and any hyper-parameters that change the behaviour of the model.
+
+In the cases of OpenAIs \`gpt-3.5-turbo-0613\` and \`gpt-4-0613\` models, any tools defined as part of the model config are passed through as functions for the model to use.
+
+You can now specify these tools when using the Humanloop chat endpoint (as a replacement for OpenAI's ChatCompletion), or when using the Humanloop log endpoint in addition to the OpenAI calls:",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/7/3#using-with-humanloop-tools",
+ "title": "Using with Humanloop tools",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2023",
+ "title": "2023",
+ },
+ {
+ "slug": "docs/changelog/7",
+ "title": "July 1900",
+ },
+ {
+ "slug": "docs/v5/changelog/2023/7/3#openai-functions-api",
+ "title": "OpenAI functions API",
+ },
+ {
+ "slug": "docs/v5/changelog/2023/7/3#using-with-humanloop-tools",
+ "title": "Using with Humanloop tools",
+ },
+ ],
+ "description": "We show here how to update the \`run_conversation()\` method from the OpenAI example to instead use the Humanloop chat endpoint with tools:
+
+\`\`\`python
+from humanloop import Humanloop
+
+hl = Humanloop(
+ # get your API key here: https://app.humanloop.com/account/api-keys
+ api_key="YOUR_API_KEY",
+)
+
+def run_conversation():
+ # Step 1: send the conversation and available functions to GPT
+ messages = [{"role": "user", "content": "What's the weather like in Boston?"}]
+ # functions are referred to as tools on Humanloop, but follows the same schema
+ tools = [
+ {
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state, e.g. San Francisco, CA",
+ },
+ "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+ },
+ "required": ["location"],
+ },
+ }
+ ]
+ response = hl.chat(
+ project="Assistant",
+ model_config={
+ "model": "gpt-3.5-turbo-0613",
+ "tools": tools
+ },
+ messages=messages
+ )
+ response = response.body.data[0]
+
+ # Step 2: check if GPT wanted to call a tool
+ if response.get("tool_call"):
+ # Step 3: call the function
+ # Note: the JSON response may not always be valid; be sure to handle errors
+ available_functions = {
+ "get_current_weather": get_current_weather,
+ } # only one function in this example, but you can have multiple
+ function_name = response_message["function_call"]["name"]
+ fuction_to_call = available_functions[function_name]
+ function_args = json.loads(response["tool_call"]["arguments"])
+ function_response = fuction_to_call(
+ location=function_args.get("location"),
+ unit=function_args.get("unit"),
+ )
+
+ # Step 4: send the response back to the model
+ messages.append(response_message)
+ messages.append(
+ {
+ "role": "tool",
+ "name": function_name,
+ "content": function_response,
+ }
+ )
+ second_response = hl.chat(
+ project="Assistant",
+ model_config={
+ "model": "gpt-3.5-turbo-0613",
+ "tools": tools
+ },
+ messages=messages
+ )
+ return second_response
+\`\`\`
+
+After running this snippet, the model configuration recorded on your project in Humanloop will now track what tools were provided to the model and the logged datapoints will provide details of the tool called to inspect:
+
+![](file:d3fcafd5-8dff-4584-bdf5-980a87614427)",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/7/3#chat-endpoint",
+ "title": "Chat endpoint",
+ "type": "page-v4",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ {
+ "slug": "docs/changelog",
+ "title": "Changelog",
+ },
+ {
+ "slug": "docs/changelog/2023",
+ "title": "2023",
+ },
+ {
+ "slug": "docs/changelog/7",
+ "title": "July 1900",
+ },
+ {
+ "slug": "docs/v5/changelog/2023/7/3#openai-functions-api",
+ "title": "OpenAI functions API",
+ },
+ {
+ "slug": "docs/v5/changelog/2023/7/3#using-with-humanloop-tools",
+ "title": "Using with Humanloop tools",
+ },
+ ],
+ "description": "Alternatively, you can also use the explicit Humanloop log alongside your existing OpenAI calls to achieve the same result:
+
+\`\`\`python
+from humanloop import Humanloop
+
+hl = Humanloop(
+ # get your API key here: https://app.humanloop.com/account/api-keys
api_key="YOUR_API_KEY",
)
@@ -17543,6 +26678,87 @@ print(run_conversation())
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "June 1900",
+ ],
+ "content": "DEPLOYMENT ENVIRONMENTS
+
+We've added support for environments to your deployments in Humanloop!
+
+This enables you to deploy your model configurations to specific environments. You'll no longer have to duplicate your projects to
+manage the deployment workflow between testing and production. With environments, you'll have the control required to manage the
+full LLM deployment lifecycle.
+
+
+ENABLING ENVIRONMENTS FOR YOUR ORGANISATION
+
+Every organisation automatically receives a default production environment. For any of your existing projects that had active
+deployments define, these have been automatically migrated over to use the default environment with no change in behaviour for the
+APIs.
+
+You can create additional environments with custom names by visiting your organisation's environments page
+[https://app.humanloop.com/account/environments].
+
+CREATING AN ENVIRONMENT
+
+Enter a custom name in the create environment dialog. Names have a constraint in that they must be unique within an organisation.
+
+[file:c987da1d-9f61-46e6-ad6b-358acac5918b]
+
+The environments you define for your organisation will be available for each project and can be viewed in the project dashboard
+once created.
+
+[file:f8d8d011-8051-4e9a-b6e0-79f06dc95dad]
+
+THE DEFAULT ENVIRONMENT
+
+By default, the production environment is marked as the Default environment. This means that all API calls targeting the "Active
+Deployment," such as Get Active Config [/api-reference/projects/getactiveconfig] or Chat Deployed
+[/api-reference/chats/createdeployed] will use this environment.
+
+Renaming environments will take immediate effect, so ensure that this change is planned and does not disrupt your production
+workflows.
+
+
+USING ENVIRONMENTS
+
+Once created on the environments page, environments can be used for each project and are visible in the respective project
+dashboards.
+
+You can deploy directly to a specific environment by selecting it in the Deployments section.
+
+[file:d2a9f417-bc43-4729-beb0-52adc535df07]
+
+Alternatively, you can deploy to multiple environments simultaneously by deploying a Model Config from either the Editor or the
+Model Configs table.
+
+
+USING ENVIRONMENTS VIA API
+
+[file:705f0552-7c7a-4cab-aa7d-a2519c207374]
+
+For v4.0 API endpoints that support Active Deployments, such as Get Active Config [/api-reference/projects/getactiveconfig] or
+Chat Deployed [/api-reference/chats/createdeployed], you can now optionally point to a model configuration deployed in a specific
+environment by including an optional additional environment field.
+
+You can find this information in our v4.0 API Documentation or within the environment card in the Project Dashboard under the "Use
+API" option.
+
+Clicking on the "Use API" option will provide code snippets that demonstrate the usage of the environment variable in practice.
+
+[file:67da63be-577c-4fe7-a35a-78522f699c41]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/6/27",
+ "title": "June 27, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -17774,6 +26990,161 @@ Clicking on the "Use API" option will provide code snippets that demonstrate the
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "June 1900",
+ ],
+ "content": "IMPROVED PYTHON SDK STREAMING RESPONSE
+
+We've improved our Python SDK's streaming response to contain the datapoint ID. Using the ID, you can now provide feedback to
+datapoints created through streaming.
+
+The humanloop.chat_stream() and humanloop.complete_stream() methods now yield a dictionary with output and id.
+
+{'output': '...', 'id': 'data_...'}
+
+
+Install the updated SDK with
+
+pip install --upgrade humanloop
+
+
+
+EXAMPLE SNIPPET
+
+import asyncio
+from humanloop import Humanloop
+
+humanloop = Humanloop(
+ api_key="YOUR_API_KEY",
+ openai_api_key="YOUR_OPENAI_API_KEY",
+)
+
+async def main():
+ response = await humanloop.chat_stream(
+ project="sdk-example",
+ messages=[
+ {
+ "role": "user",
+ "content": "Explain asynchronous programming.",
+ }
+ ],
+ model_config={
+ "model": "gpt-3.5-turbo",
+ "max_tokens": -1,
+ "temperature": 0.7,
+ "chat_template": [
+ {
+ "role": "system",
+ "content": "You are a helpful assistant who replies in the style of {{persona}}.",
+ },
+ ],
+ },
+ inputs={
+ "persona": "the pirate Blackbeard",
+ },
+ )
+ async for token in response.content:
+ print(token) # E.g. {'output': 'Ah', 'id': 'data_oun7034jMNpb0uBnb9uYx'}
+
+asyncio.run(main())
+
+
+
+OPENAI AZURE SUPPORT
+
+We've just added support for Azure deployments of OpenAI models to Humanloop!
+
+This update adds the ability to target Microsoft Azure deployments of OpenAI models to the playground and your projects. To set
+this up, visit your organization's settings [https://app.humanloop.com/account/api-keys].
+
+
+ENABLING AZURE OPENAI FOR YOUR ORGANIZATION
+
+As a prerequisite, you will need to already be setup with Azure OpenAI Service. See the Azure OpenAI docs
+[https://learn.microsoft.com/en-us/azure/cognitive-services/openai/how-to/create-resource?pivots=web-portal] for more details. At
+the time of writing, access is granted by application only.
+
+[file:b06dd293-de11-4cbe-9583-1b71a9312c7d]
+
+Click the Setup button and provide your Azure OpenAI endpoint and API key.
+
+Your endpoint can be found in the Keys & Endpoint section when examining your resource from the Azure portal. Alternatively, you
+can find the value in Azure OpenAI Studio > Playground > Code View. An example endpoint is: docs-test-001.openai.azure.com.
+
+Your API keys can also be found in the Keys & Endpoint section when examining your resource from the Azure portal. You can use
+either KEY1 or KEY2.
+
+
+WORKING WITH AZURE OPENAI MODELS
+
+Once you've successfully enabled Azure OpenAI for your organization, you'll be able to access it through the playground
+[https://app.humanloop.com/playground] and in your projects in exactly the same way as your existing OpenAI and/or Anthropic
+models.
+
+[file:8f584d3e-49dd-4112-b46c-594acb505886]
+
+
+REST API AND PYTHON / TYPESCRIPT SUPPORT
+
+As with other model providers, once you've set up an Azure OpenAI-backed model config, you can call it with the Humanloop REST API
+or our SDKs [/docs/api-reference/sdks].
+
+import { Humanloop } from "humanloop";
+
+const humanloop = new Humanloop({
+ apiKey: "API_KEY",
+});
+
+const chatResponse = await humanloop.chat({
+ project: "project_example",
+ messages: [
+ {
+ role: "user",
+ content: "Write me a song",
+ },
+ ],
+ provider_api_keys: {
+ openai_azure: OPENAI_AZURE_API_KEY,
+ openai_azure_endpoint: OPENAI_AZURE_ENDPOINT,
+ },
+ model_config: {
+ model: "my-azure-deployed-gpt-4",
+ temperature: 1,
+ },
+});
+
+console.log(chatResponse);
+
+
+In the model_config.model field, provide the name of the model that you deployed from the Azure portal (see note below for
+important naming conventions when setting up your deployment in the Azure portal).
+
+The request will use the stored organization level key and endpoint you configured above, unless you override this on a
+per-request basis by passing both the endpoint and API key in the provider_api_keys field, as shown in the example above.
+
+
+NOTE: NAMING MODEL DEPLOYMENTS
+
+When you deploy a model through the Azure portal, you'll have the ability to provide your deployment with a unique name. For
+instance, if you choose to deploy an instance of gpt-35-turbo in your OpenAI Service, you may choose to give this an arbitrary
+name like my-orgs-llm-model.
+
+In order to use all Humanloop features with your Azure model deployment, you must ensure that your deployments are named either
+with an unmodified base model name like gpt-35-turbo, or the base model name with a custom prefix like my-org-gpt-35-turbo. If
+your model deployments use arbitrary names which do not prefix a base model name, you may find that certain features such as
+setting max_tokens=-1 in your model configs fail to work as expected.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/6/20",
+ "title": "June 20, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -18089,6 +27460,35 @@ In order to use all Humanloop features with your Azure model deployment, you mus
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "June 1900",
+ ],
+ "content": "PROJECT EDITOR
+
+We’ve introduced an Editor within each project to help you make it easier to to change prompts and bring in project specific data.
+
+The Editor will load up the currently active model config, and will save the generations in the project’s data table.
+[file:8138137e-3d52-4f14-a62a-536e3c02ec27]
+
+You can now also bring datapoints directly to the Editor. Select any datapoints you want to bring to Editor (also
+through x shortcut) and you can choose to open them in Editor (or e shortcut)
+
+Press \`e\` while selecting a datapoint to bring it into Editor [file:8312da7a-d676-4948-9258-d9cfbb91b8c0]
+
+We think this workflow significantly improves the workflow to go from interesting datapoint to improved model config. As always,
+let us know if you have other feedback.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/6/13",
+ "title": "June 13, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -18147,6 +27547,80 @@ We think this workflow significantly improves the workflow to go from interestin
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "May 1900",
+ ],
+ "content": "COHERE
+
+We've just added support for Cohere to Humanloop!
+
+[file:4421506b-5cde-4887-8466-64a62d24508d]
+
+This update adds Cohere models to the playground and your projects - just add your Cohere API key in your organization's settings
+[https://app.humanloop.com/account/api-keys]. As with other providers, each user in your organization can also set a personal
+override API key, stored locally in the browser, for use in Cohere requests from the Playground.
+
+
+ENABLING COHERE FOR YOUR ORGANIZATION
+
+Add your Cohere API key to your organization settings to start using Cohere models with Humanloop.
+[file:76e1d4d3-4683-4d96-98b4-df9062c314e5]
+
+
+WORKING WITH COHERE MODELS
+
+Once you've successfully enabled Cohere for your organization, you'll be able to access it through the playground
+[https://app.humanloop.com/playground] and in your projects, in exactly the same way as your existing OpenAI and/or Anthropic
+models.
+
+[file:5f66beef-0b63-4b1b-bc04-9542bfe80a3d]
+
+
+REST API AND PYTHON / TYPESCRIPT SUPPORT
+
+As with other model providers, once you've set up a Cohere-backed model config, you can call it with the Humanloop REST API or our
+SDKs [/docs/api-reference/sdks].
+
+import { Humanloop } from "humanloop";
+
+const humanloop = new Humanloop({
+ apiKey: "API_KEY",
+});
+
+const chatResponse = await humanloop.chat({
+ project: "project_example",
+ messages: [
+ {
+ role: "user",
+ content: "Write me a song",
+ },
+ ],
+ provider_api_keys: {
+ cohere: COHERE_API_KEY,
+ },
+ model_config: {
+ model: "command",
+ temperature: 1,
+ },
+});
+
+console.log(chatResponse);
+
+
+If you don't provide a Cohere API key under the provider_api_keys field, the request will fall back on the stored organization
+level key you configured above.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/5/23",
+ "title": "May 23, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -18319,6 +27793,78 @@ If you don't provide a Cohere API key under the \`provider_api_keys\` field, the
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "May 1900",
+ ],
+ "content": "IMPROVED PYTHON SDK
+
+We've just released a new version of our Python SDK supporting our v4 API!
+
+This brings support for:
+
+ * 💬 Chat mode humanloop.chat(...)
+ * 📥 Streaming support humanloop.chat_stream(...)
+ * 🕟 Async methods humanloop.acomplete(...)
+
+https://pypi.org/project/humanloop/ [https://pypi.org/project/humanloop/]
+
+
+INSTALLATION
+
+pip install --upgrade humanloop
+
+
+EXAMPLE USAGE
+
+complete_response = humanloop.complete(
+ project="sdk-example",
+ inputs={
+ "text": "Llamas that are well-socialized and trained to halter and lead after weaning and are very friendly and pleasant to be around. They are extremely curious and most will approach people easily. However, llamas that are bottle-fed or over-socialized and over-handled as youth will become extremely difficult to handle when mature, when they will begin to treat humans as they treat each other, which is characterized by bouts of spitting, kicking and neck wrestling.[33]",
+ },
+ model_config={
+ "model": "gpt-3.5-turbo",
+ "max_tokens": -1,
+ "temperature": 0.7,
+ "prompt_template": "Summarize this for a second-grade student:\\n\\nText:\\n{{text}}\\n\\nSummary:\\n",
+ },
+ stream=False,
+)
+pprint(complete_response)
+pprint(complete_response.project_id)
+pprint(complete_response.data[0])
+pprint(complete_response.provider_responses)
+
+
+
+MIGRATION FROM 0.3.X
+
+For those coming from an older SDK version, this introduces some breaking changes. A brief highlight of the changes:
+
+ * The client initialization step of hl.init(...) is now humanloop = Humanloop(...).
+ * Previously provider_api_keys could be provided in hl.init(...). They should now be provided when constructing Humanloop(...)
+ client.
+
+ * humanloop = Humanloop(
+ api_key="YOUR_API_KEY",
+ openai_api_key="YOUR_OPENAI_API_KEY",
+ anthropic_api_key="YOUR_ANTHROPIC_API_KEY",
+ )
+
+ * hl.generate(...)'s various call signatures have now been split into individual methods for clarity. The main ones are:
+ * humanloop.complete(project, model_config={...}, ...) for a completion with the specified model config parameters.
+ * humanloop.complete_deployed(project, ...) for a completion with the project's active deployment.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/5/17",
+ "title": "May 17, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -18495,6 +28041,64 @@ pprint(complete_response.provider_responses)
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "April 1900",
+ ],
+ "content": "TYPESCRIPT SDK
+
+We now have a fully typed TypeScript SDK to make working with Humanloop even easier.
+
+https://www.npmjs.com/package/humanloop [https://www.npmjs.com/package/humanloop]
+
+You can use this with your JavaScript, TypeScript or Node projects.
+
+Installation
+
+npm i humanloop
+
+
+Example usage
+
+import { Humanloop } from "humanloop"
+
+const humanloop = new Humanloop({
+ // Defining the base path is optional and defaults to https://api.humanloop.com/v3
+ // basePath: "https://api.humanloop.com/v3",
+ apiKey: 'API_KEY',
+})
+
+
+const chatResponse = await humanloop.chat({
+ "project": "project_example",
+ "messages": [
+ {
+ "role": "user",
+ "content": "Write me a song",
+ }
+ ],
+ "provider_api_keys": {
+ "openai": OPENAI_API_KEY
+ },
+ "model_config": {
+ "model": "gpt-4",
+ "temperature": 1,
+ },
+})
+
+console.log(chatResponse)
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/4/3",
+ "title": "April 3, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -18587,6 +28191,35 @@ console.log(chatResponse)
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "March 1900",
+ ],
+ "content": "KEYBOARD SHORTCUTS AND DATAPOINT LINKS
+
+[file:e8a301be-912c-43bf-98a1-546682678e5d]
+
+We’ve added keyboard shortcuts to the datapoint viewer
+
+g for good
+b for bad
+
+and j / k for next/prev
+
+This should help you for quickly annotating data within your team.
+
+You can also link to specific datapoint in the URL now as well.",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/3/30",
+ "title": "March 30, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -18648,6 +28281,28 @@ You can also link to specific datapoint in the URL now as well.",
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "March 1900",
+ ],
+ "content": "CHATGPT SUPPORT
+
+ChatGPT is here! It's called 'gpt-3.5-turbo'. Try it out today in playground and on the generate endpoint.
+
+Faster and 10x cheaper than text-davinci-003.
+
+[file:003985af-abc8-4c19-b093-a3c944a1cee4]",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/3/2",
+ "title": "March 2, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -18702,6 +28357,41 @@ Faster and 10x cheaper than text-davinci-003.
"slug": "docs/getting-started/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Changelog",
+ "2023",
+ "February 1900",
+ ],
+ "content": "FASTER DATAPOINTS TABLE LOADING
+
+Initial datapoints table is now twice as fast to load! And it will continue to get faster.
+
+
+ABILITY TO OPEN DATAPOINT IN PLAYGROUND
+
+Added a way to go from the datapoint drawer to the playground with that datapoint loaded. Very convenient for trying tweaks to a
+model config or understanding an issue, without copy pasting.
+
+
+
+
+MARKDOWN VIEW AND COMPLETED PROMPT TEMPLATES
+
+We’ve added a tab to the datapoint drawer so you can see the prompt template filled in with the inputs and output.
+
+We’ve also button in the top right hand corner (or press M) to toggle on/off viewing the text as markdown.
+
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v5/changelog/2023/2/20",
+ "title": "February 20, 2023",
+ "type": "page-v3",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -18839,6 +28529,247 @@ This tutorial picks up where the [Quick Start](./quickstart) left off. If you’
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Tutorials",
+ ],
+ "content": "At the end of this tutorial, you’ll have created your first GPT-4 app. You’ll also have learned how to:
+
+ 1. Create a Prompt
+ 2. Use the Humanloop SDK to call Open AI GPT-4 and log your results
+ 3. Capture feedback from your end users to evaluate and improve your model
+
+In this tutorial, you'll build a simple GPT-4 app that can explain a topic in the style of different experts.
+[file:c07d40a7-280a-4950-9bc9-617a9049b1df]
+
+This tutorial picks up where the Quick Start [./quickstart] left off. If you’ve already followed the quick start you can skip to
+step 4 below.
+
+
+CREATE THE PROMPT
+
+#### Create a Humanloop Account
+
+If you haven’t already, create an account or log in to Humanloop
+
+ADD AN OPENAI API KEY
+
+If you’re the first person in your organization, you’ll need to add an API key to a model provider.
+
+ 1. Go to OpenAI and grab an API key [https://platform.openai.com/api-keys]
+ 2. In Humanloop Organization Settings [https://app.humanloop.com/account/api-keys] set up OpenAI as a model provider.
+
+Using the Prompt Editor will use your OpenAI credits in the same way that the OpenAI playground does. Keep your API keys for
+Humanloop and the model providers private.
+
+
+GET STARTED
+
+### Create a Prompt File
+
+When you first open Humanloop you’ll see your File navigation on the left. Click ‘+ New’ and create a Prompt.
+
+[file:ad732e1d-77a8-4576-9933-1db6f9d9d28f]
+
+In the sidebar, rename this file to "Comedian Bot" now or later.
+
+
+CREATE THE PROMPT TEMPLATE IN THE EDITOR
+
+The left hand side of the screen defines your Prompt – the parameters such as model, temperature and template. The right hand side
+is a single chat session with this Prompt.
+
+[file:b9ed95cc-edc2-4c49-b8d3-4f164a083123]
+
+Click the “+ Message” button within the chat template to add a system message to the chat template.
+
+[file:5d7dd0e4-73f6-41b9-ad2b-60ba9f349f26]
+
+Add the following templated message to the chat template.
+
+You are a funny comedian. Write a joke about {{topic}}.
+
+
+This message forms the chat template. It has an input slot called topic (surrounded by two curly brackets) for an input value that
+is provided each time you call this Prompt.
+
+On the right hand side of the page, you’ll now see a box in the Inputs section for topic.
+
+ 1. Add a value for topic e.g. music, jogging, whatever
+ 2. Click Run in the bottom right of the page
+
+This will call OpenAI’s model and return the assistant response. Feel free to try other values, the model is very funny.
+
+You now have a first version of your prompt that you can use.
+
+
+COMMIT YOUR FIRST VERSION OF THIS PROMPT
+
+ 1. Click the Commit button
+ 2. Put “initial version” in the commit message field
+ 3. Click Commit
+
+[file:386f75eb-c97a-4923-9823-168a14848719]
+
+
+VIEW THE LOGS
+
+Under the Prompt File, click ‘Logs’ to view all the generations from this Prompt
+
+Click on a row to see the details of what version of the prompt generated it. From here you can give feedback to that generation,
+see performance metrics, open up this example in the Editor, or add this log to a dataset.
+
+[file:f2b286b8-7fcf-4323-9308-6ca5fbc22e44]
+
+
+CALL THE PROMPT IN AN APP
+
+Now that you’ve found a good prompt and settings, you’re ready to build the "Learn anything from anyone" app! We’ve written some
+code to get you started — follow the instructions below to download the code and run the app.
+
+When you run the app, this is what you should see. [file:c07d40a7-280a-4950-9bc9-617a9049b1df]
+
+
+SETUP
+
+If you don’t have Python 3 installed, install it from here [https://www.python.org/downloads/]. Then download the code by cloning
+this repository [https://github.com/humanloop/humanloop-tutorial-python] in your terminal:
+
+git clone git@github.com:humanloop/humanloop-tutorial-python.git
+
+
+If you prefer not to use git, you can alternatively download the code using this zip file
+[https://github.com/humanloop/humanloop-tutorial-python/archive/refs/heads/main.zip].
+
+In your terminal, navigate into the project directory and make a copy of the example environment variables file.
+
+cd humanloop-tutorial-python
+cp .example.env .env
+
+
+Copy your Humanloop API key [https://app.humanloop.com/account/settings] and set it as HUMANLOOP_API_KEY in your newly created
+.env file. Copy your OpenAI API key [https://beta.openai.com/account/api-keys] and set it as the OPENAI_API_KEY.
+
+
+RUN THE APP
+
+Run the following commands in your terminal in the project directory to install the dependencies and run the app.
+
+python -m venv venv
+. venv/bin/activate
+pip install -r requirements.txt
+flask run
+
+
+Open http://localhost:5000 [http://localhost:5000] in your browser and you should see the app. If you type in the name of an
+expert, e.g "Aristotle", and a topic that they're famous for, e.g "ethics", the app will try to generate an explanation in their
+style.
+
+Press the thumbs-up or thumbs-down buttons to register your feedback on whether the generation is any good.
+
+Try a few more questions. Perhaps change the name of the expert and keep the topic fixed.
+
+
+VIEW THE DATA ON HUMANLOOP
+
+Now that you have a working app you can use Humanloop to measure and improve performance. Go back to the Humanloop app and go to
+your project named "learn-anything".
+
+On the Models dashboard you'll be able to see how many data points have flowed through the app as well as how much feedback you've
+received. Click on your model in the table at the bottom of the page.
+
+[file:f12d3836-53ea-451c-9480-cec4fc996a52]
+
+Click View data in the top right. Here you should be able to see each of your generations as well as the feedback that's been
+logged against them. You can also add your own internal feedback by clicking on a datapoint in the table and using the feedback
+buttons.
+
+
+UNDERSTAND THE CODE
+
+Open up the file app.py in the "openai-quickstart-python" folder. There are a few key code snippets that will let you understand
+how the app works.
+
+Between lines 30 and 41 you'll see the following code.
+
+expert = request.form["Expert"]
+topic = request.form["Topic"]
+
+# hl.complete automatically logs the data to your project.
+complete_response = humanloop.complete_deployed(
+ project="learn-anything",
+ inputs={"expert": expert, "topic": topic},
+ provider_api_keys={"openai": OPENAI_API_KEY}
+)
+
+data_id = complete_response.data[0].id
+result = complete_response.data[0].output
+
+
+On line 34 you can see the call to humanloop.complete_deployed which takes the project name and project inputs as variables.
+humanloop.complete_deployed calls GPT-4 and also automatically logs your data to the Humanloop app.
+
+In addition to returning the result of your model on line 39, you also get back a data_id which can be used for recording feedback
+about your generations.
+
+On line 51 of app.py, you can see an example of logging feedback to Humanloop.
+
+# Send feedback to Humanloop
+humanloop.feedback(type="rating", value="good", data_id=data_id)
+
+
+The call to humanloop.feedback uses the data_id returned above to associate a piece of positive feedback with that generation.
+
+In this app there are two feedback groups rating (which can be good or bad) and actions, which here is the copy button and also
+indicates positive feedback from the user.
+
+
+ADD A NEW MODEL CONFIG
+
+If you experiment a bit, you might find that the model isn't initially that good. The answers are often too short or not in the
+style of the expert being asked. We can try to improve this by experimenting with other prompts.
+
+ 1. Click on your model on the model dashboard and then in the top right, click Editor
+
+ [file:8556f6d1-6daa-4976-a8ca-bd2ba3eaf5a9]
+
+ 2. Edit the prompt template to try and improve the prompt. Try changing the maximum number of tokens using the Max tokens slider,
+ or the wording of the prompt.
+
+ [file:89478698-dd26-48e7-9909-657ea619d2ae]
+
+Here are some prompt ideas to try out. Which ones work better?
+
+\`\`\`Text Transcript from lecture {{ expert }} recently gave a lecture on {{ topic }}. Here is a transcript of the most interesting
+section: \`\`\` \`\`\`Text ELI10 If {{ expert }} explained {{ topic, }} to a 10 year old, they would likely say: \`\`\` \`\`\` Write an essay
+in the style of {{ expert }} on {{ topic }} \`\`\`
+
+ 3. Click Save to add the new model to your project. Add it to the "learn-anything" project.
+
+ [file:5b3c9a30-5bda-44e3-92b1-b7c48bdd5d2a]
+
+ 4. Go to your project dashboard. At the top left of the page, click menu of "production" environment card. Within that click the
+ button Change deployment and set a new model config as active; calls to humanloop.complete_deployed will now use this new
+ model. Now go back to the app and see the effect!
+
+[file:5b51fe33-1aa6-43c7-a2d1-284084f65785]
+
+
+CONGRATULATIONS!
+
+And that’s it! You should now have a full understanding of how to go from creating a Prompt in Humanloop to a deployed and
+functioning app. You've learned how to create prompt templates, capture user feedback and deploy a new models.
+
+If you want to learn how to improve your model by running experiments or finetuning check out our guides below.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/tutorials/create-your-first-gpt-4-app",
+ "title": "Create your first GPT-4 App",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -19196,6 +29127,555 @@ If you just want to leap in, the complete repo for this project is available on
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Tutorials",
+ ],
+ "content": "At the end of this tutorial, you'll have built a simple ChatGPT-style interface using Humanloop as the backend to manage
+interactions with your model provider, track user engagement and experiment with model configuration.
+
+If you just want to leap in, the complete repo for this project is available on GitHub here.
+[https://github.com/humanloop/hl-chatgpt-clone-typescript]
+
+A simple ChatGPT-style interface using the Humanloop SDK to manage interaction with your model provider, track user engagement,
+log results and help you evaluate and improve your model. [file:cb2912b0-8828-44c9-b339-d8ea51de3de7]
+
+
+STEP 1: CREATE A NEW PROMPT IN HUMANLOOP
+
+First, create a Prompt with the name chat-tutorial-ts. Go to the Editor tab on the left. Here, we can play with parameters and
+prompt templates to create a model which will be accessible via the Humanloop SDK.
+
+If this is your first time using the Prompt Editor, you'll be prompted to enter an OpenAI API key. You can create one by going
+[here.](https://beta.openai.com/account/api-keys)
+
+The Prompt Editor is an interactive environment where you can experiment with prompt templates to create a model which will be
+accessible via the Humanloop SDK.
+
+[file:848ac972-31ac-4bcf-8d18-8f9902feed53]
+
+Let's try to create a chess tutor. Paste the following system message into the Chat template box on the left-hand side.
+
+You are a chess grandmaster, who is also a friendly and helpful chess instructor.
+
+Play a game of chess with the user. Make your own moves in reply to the student.
+
+Explain succintly why you made that move. Make your moves in algebraic notation.
+
+
+In the Parameters section above, select gpt-4 as the model. Click Commit and enter a commit message such as "GPT-4 Grandmaster".
+
+Navigate back to the Dashboard tab in the sidebar. Your new Prompt Version is visible in the table at the bottom of the Prompt
+dashboard.
+
+
+STEP 2: SET UP A NEXT.JS APPLICATION
+
+Now, let's turn to building out a simple Next.js application. We'll use the Humanloop TypeScript SDK to provide programmatic
+access to the model we just created.
+
+Run npx create-next-app@latest to create a fresh Next.js project. Accept all the default config options in the setup wizard (which
+includes using TypeScript, Tailwind, and the Next.js app router). Now npm run dev to fire up the development server.
+
+Next npm i humanloop to install the Humanloop SDK in your project.
+
+Edit app/page.tsx to the following. This code stubs out the basic React components and state management we need for a chat
+interface.
+
+"use client";
+
+import { ChatMessageWithToolCall } from "humanloop";
+import * as React from "react";
+
+const { useState } = React;
+
+export default function Home() {
+ const [messages, setMessages] = useState([]);
+ const [inputValue, setInputValue] = useState("");
+
+ const onSend = async () => {
+ const userMessage: ChatMessageWithToolCall = {
+ role: "user",
+ content: inputValue,
+ };
+
+ setInputValue("");
+
+ const newMessages = [...messages, userMessage];
+
+ setMessages(newMessages);
+
+ // REPLACE ME LATER
+ const res = "I'm not a language model. I'm just a string. 😞";
+ // END REPLACE ME
+
+ const assistantMessage: ChatMessageWithToolCall = {
+ role: "assistant",
+ content: res,
+ };
+
+ setMessages([...newMessages, assistantMessage]);
+ };
+
+ const handleKeyDown = (e: React.KeyboardEvent) => {
+ if (e.key === "Enter") {
+ onSend();
+ }
+ };
+
+ return (
+
+
+ );
+};
+
+
+We shouldn't call the Humanloop SDK from the client's browser as this would require giving out the Humanloop API key, which _you
+should not do!_ Instead, we'll create a simple backend API route in Next.js which can perform the Humanloop requests on the Node
+server and proxy these back to the client.
+
+Create a file containing the code below at app/api/chat/route.ts. This will automatically create an API route at /api/chat. In the
+call to the Humanloop SDK, you'll need to pass the project name you created in step 1.
+
+import { Humanloop, ChatMessageWithToolCall } from "humanloop";
+
+if (!process.env.HUMANLOOP_API_KEY) {
+ throw Error(
+ "no Humanloop API key provided; add one to your .env.local file with: \`HUMANLOOP_API_KEY=..."
+ );
+}
+
+const humanloop = new Humanloop({
+ basePath: "https://api.humanloop.com/v4",
+ apiKey: process.env.HUMANLOOP_API_KEY,
+});
+
+export async function POST(req: Request): Promise {
+ const messages: ChatMessageWithToolCall[] =
+ (await req.json()) as ChatMessageWithToolCall[];
+ console.log(messages);
+
+ const response = await humanloop.chatDeployed({
+ project: "chat-tutorial-ts",
+ messages,
+ });
+
+ return new Response(JSON.stringify(response.data.data[0].output));
+}
+
+
+In this code, we're calling humanloop.chatDeployed. This function is used to target the model which is actively deployed on your
+project - in this case it should be the model we set up in step 1. Other related functions in the SDK reference
+[/docs/api-reference/sdks] (such as humanloop.chat) allow you to target a specific model config (rather than the actively deployed
+one) or even specify model config directly in the function call.
+
+When we receive a response from Humanloop, we strip out just the text of the chat response and send this back to the client via a
+Response object (see Next.js - Route Handler docs
+[https://nextjs.org/docs/app/building-your-application/routing/router-handlers]). The Humanloop SDK response contains much more
+data besides the raw text, which you can inspect by logging to the console.
+
+For the above to work, you'll need to ensure that you have a .env.local file at the root of your project directory with your
+Humanloop API key. You can generate a Humanloop API key by clicking your name in the bottom left and selecting API keys.
+[https://app.humanloop.com/account/api-keys] This environment variable will only be available on the Next.js server, not on the
+client (see Next.js - Environment Variables
+[https://nextjs.org/docs/pages/building-your-application/configuring/environment-variables]).
+
+HUMANLOOP_API_KEY=...
+
+
+Now, modify page.tsx to use a fetch request against the new API route.
+
+const onSend = async () => {
+ // REPLACE ME NOW
+
+ setMessages(newMessages);
+
+ const response = await fetch("/api/chat", {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ },
+ body: JSON.stringify(newMessages),
+ });
+
+ const res = await response.json();
+
+ // END REPLACE ME
+};
+
+
+You should now find that your application works as expected. When we send messages from the client, a GPT response appears beneath
+(after a delay).
+
+[file:55e6b823-989b-4136-8aa7-662e85a6b7af]
+
+Back in your Humanloop Prompt dashboard you should see Logs being recorded as clients interact with your model.
+
+[file:35969747-ab10-4420-9f7e-e40d54c20ee7]
+
+
+STEP 3: STREAMING TOKENS
+
+(Note: requires Node version 18+).
+
+You may notice that model responses can take a while to appear on screen. Currently, our Next.js API route blocks while the entire
+response is generated, before finally sending the whole thing back to the client browser in one go. For longer generations, this
+can take some time, particularly with larger models like GPT-4. Other model config settings can impact this too.
+
+To provide a better user experience, we can deal with this latency by streaming tokens back to the client as they are generated
+and have them display eagerly on the page. The Humanloop SDK wraps the model providers' streaming functionality so that we can
+achieve this. Let's incorporate streaming tokens into our app next.
+
+Edit the API route at to look like the following. Notice that we have switched to using the humanloop.chatDeployedStream function,
+which offers Server Sent Event [https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events] streaming as new tokens
+arrive from the model provider.
+
+import { Humanloop, ChatMessageWithToolCall } from "humanloop";
+
+if (!process.env.HUMANLOOP_API_KEY) {
+ throw Error(
+ "no Humanloop API key provided; add one to your .env.local file with: \`HUMANLOOP_API_KEY=..."
+ );
+}
+
+const humanloop = new Humanloop({
+ basePath: "https://api.humanloop.com/v4",
+ apiKey: process.env.HUMANLOOP_API_KEY,
+});
+
+export async function POST(req: Request): Promise {
+ const messages: ChatMessageWithToolCall[] =
+ (await req.json()) as ChatMessageWithToolCall[];
+
+ const response = await humanloop.chatDeployedStream({
+ project: "chat-tutorial-ts",
+ messages,
+ });
+
+ return new Response(response.data);
+}
+
+
+Now, modify the onSend function in page.tsx to the following. This streams the response body in chunks, updating the UI each time
+a new chunk arrives.
+
+const onSend = async () => {
+ const userMessage: ChatMessageWithToolCall = {
+ role: "user",
+ content: inputValue,
+ };
+
+ setInputValue("");
+
+ const newMessages: ChatMessageWithToolCall[] = [
+ ...messages,
+ userMessage,
+ { role: "assistant", content: "" },
+ ];
+
+ setMessages(newMessages);
+
+ const response = await fetch("/api/chat", {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ },
+ body: JSON.stringify(newMessages),
+ });
+
+ if (!response.body) throw Error();
+
+ const decoder = new TextDecoder();
+ const reader = response.body.getReader();
+ let done = false;
+ while (!done) {
+ const chunk = await reader.read();
+ const value = chunk.value;
+ done = chunk.done;
+ const val = decoder.decode(value);
+ const jsonChunks = val
+ .split("}{")
+ .map(
+ (s) => (s.startsWith("{") ? "" : "{") + s + (s.endsWith("}") ? "" : "}")
+ );
+ const tokens = jsonChunks.map((s) => JSON.parse(s).output).join("");
+
+ setMessages((messages) => {
+ const updatedLastMessage = messages.slice(-1)[0];
+
+ return [
+ ...messages.slice(0, -1),
+ {
+ ...updatedLastMessage,
+ content: (updatedLastMessage.content as string) + tokens,
+ },
+ ];
+ });
+ }
+};
+
+
+You should now find that tokens stream onto the screen as soon as they are available.
+
+[file:3c5ff707-1ca8-4f3d-8a32-7566975c9731]
+
+
+STEP 4: ADD FEEDBACK BUTTONS
+
+We'll now add feedback buttons to the Assistant chat messages, and submit feedback on those Logs via the Humanloop API whenever
+the user clicks the buttons.
+
+Modify page.tsx to include an id for each message in React state. Note that we'll only have ids for assistant messages, and null
+for user messages.
+
+// A new type which also includes the Humanloop data_id for a message generated by the model.
+interface ChatListItem {
+ id: string | null; // null for user messages, string for assistant messages
+ message: ChatMessageWithToolCall;
+}
+
+export default function Home() {
+ const [chatListItems, setChatListItems] =
+ useState([]); // <- update to use the new type
+ ...
+
+
+Modify the onSend function to look like this:
+
+const onSend = async () => {
+ const userMessage: ChatMessageWithToolCall = {
+ role: "user",
+ content: inputValue,
+ };
+
+ setInputValue("");
+
+ const newItems: ChatListItem[] = [
+ // <- modified to update the new list type
+ ...chatListItems,
+ { message: userMessage, id: null },
+ { message: { role: "assistant", content: "" }, id: null },
+ ];
+
+ setChatListItems(newItems);
+
+ const response = await fetch("/api/chat", {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ },
+ body: JSON.stringify(newItems.slice(0, -1).map((item) => item.message)), // slice off the final message, which is the currently empty placeholder for the assistant response
+ });
+
+ if (!response.body) throw Error();
+
+ const decoder = new TextDecoder();
+ const reader = response.body.getReader();
+ let done = false;
+ while (!done) {
+ const chunk = await reader.read();
+ const value = chunk.value;
+ done = chunk.done;
+ const val = decoder.decode(value);
+ const jsonChunks = val
+ .split("}{")
+ .map(
+ (s) => (s.startsWith("{") ? "" : "{") + s + (s.endsWith("}") ? "" : "}")
+ );
+ const tokens = jsonChunks.map((s) => JSON.parse(s).output).join("");
+ const id = JSON.parse(jsonChunks[0]).id; // <- extract the data id from the streaming response
+
+ setChatListItems((chatListItems) => {
+ const lastItem = chatListItems.slice(-1)[0];
+ const updatedId = id || lastItem.id; // <- use the id from the streaming response if it's not already set
+ return [
+ ...chatListItems.slice(0, -1),
+ {
+ ...lastItem,
+ message: {
+ ...lastItem.message,
+ content: (lastItem.message.content as string) + tokens,
+ },
+ id: updatedId, // <- include the id when we update state
+ },
+ ];
+ });
+ }
+};
+
+
+Now, modify the MessageRow component to become a ChatItemRow component which knows about the id.
+
+interface ChatItemRowProps {
+ item: ChatListItem;
+}
+
+const ChatItemRow: React.FC = ({ item }) => {
+ const onFeedback = async (feedback: string) => {
+ const response = await fetch("/api/feedback", {
+ method: "POST",
+ headers: {
+ "Content-Type": "application/json",
+ },
+ body: JSON.stringify({ id: item.id, value: feedback }),
+ });
+ };
+
+ return (
+
+
+ {item.message.role}
+
+
+ {item.message.content as string}
+
+
+
+ {item.id !== null && (
+
+
+
+
+ )}
+
+
+ );
+};
+
+
+And finally for page.tsx, modify the rendering of the message history to use the new component:
+
+// OLD
+// {messages.map((msg, idx) => (
+//
+// ))}
+
+// NEW
+{
+ chatListItems.map((item, idx) => (
+
+ ));
+}
+
+
+Next, we need to create a Next.js API route for submitting feedback, similar to the one we had for making a /chat request. Create
+a new file at the path app/api/feedback/route.ts with the following code:
+
+import { Humanloop } from "humanloop";
+
+if (!process.env.HUMANLOOP_API_KEY) {
+ throw Error(
+ "no Humanloop API key provided; add one to your .env.local file with: \`HUMANLOOP_API_KEY=..."
+ );
+}
+
+const humanloop = new Humanloop({
+ apiKey: process.env.HUMANLOOP_API_KEY,
+});
+
+interface FeedbackRequest {
+ id: string;
+ value: string;
+}
+
+export async function POST(req: Request): Promise {
+ const feedbackRequest: FeedbackRequest = await req.json();
+
+ await humanloop.feedback({
+ type: "rating",
+ data_id: feedbackRequest.id,
+ value: feedbackRequest.value,
+ });
+
+ return new Response();
+}
+
+
+This code simply proxies the feedback request through the Next.js server. You should now see feedback buttons on the relevant rows
+in chat.
+
+Chat interface with feedback buttons. [file:95366c0f-d17b-4270-a395-1d353aa13538]
+
+When you click one of these feedback buttons and visit the Prompt in Humanloop, you should see the feedback logged against the
+log.
+
+[file:2cf8e05e-c021-4d4d-ad14-16e57221c553]
+
+
+CONCLUSION
+
+Congratulations! You've now built a working chat interface and used Humanloop to handle interaction with the model provider and
+log chats. You used a system message (which is invisible to your end user) to make GPT-4 behave like a chess tutor. You also added
+a way for your app's users to provide feedback which you can track in Humanloop to help improve your models.
+
+Now that you've seen how to create a simple Humanloop project and build a chat interface on top of it, try visiting the Humanloop
+project dashboard to view the logs and iterate on your model configs. You can also create experiments to learn which model configs
+perform best with your users. To learn more about these topics, take a look at our guides below.
+
+All the code for this project is available on Github [https://github.com/humanloop/hl-chatgpt-clone-typescript].",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/tutorials/chatgpt-clone-in-nextjs",
+ "title": "ChatGPT clone with streaming",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -19802,6 +30282,177 @@ You can create an account now by going to the [Sign up page](https://app.humanlo
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ ],
+ "content": "Humanloop acts as a registry of your Prompts [/docs/prompts] so you can centrally manage all their versions and Logs [/docs/logs],
+and evaluate and improve your AI systems.
+
+This guide will show you how to create a Prompt in the UI or via the SDK/API.
+
+**Prerequisite**: A Humanloop account.
+
+You can create an account now by going to the Sign up page [https://app.humanloop.com/signup].
+
+
+CREATE A PROMPT IN THE UI
+
+### Create a Prompt File
+
+When you first open Humanloop you’ll see your File navigation on the left. Click ‘+ New’ and create a Prompt.
+
+[file:ad732e1d-77a8-4576-9933-1db6f9d9d28f]
+
+In the sidebar, rename this file to "Comedian Bot" now or later.
+
+
+CREATE THE PROMPT TEMPLATE IN THE EDITOR
+
+The left hand side of the screen defines your Prompt – the parameters such as model, temperature and template. The right hand side
+is a single chat session with this Prompt.
+
+[file:b9ed95cc-edc2-4c49-b8d3-4f164a083123]
+
+Click the "+ Message" button within the chat template to add a system message to the chat template.
+
+[file:5d7dd0e4-73f6-41b9-ad2b-60ba9f349f26]
+
+Add the following templated message to the chat template.
+
+You are a funny comedian. Write a joke about {{topic}}.
+
+
+This message forms the chat template. It has an input slot called topic (surrounded by two curly brackets) for an input value that
+is provided each time you call this Prompt.
+
+On the right hand side of the page, you’ll now see a box in the Inputs section for topic.
+
+ 1. Add a value fortopic e.g. music, jogging, whatever.
+ 2. Click Run in the bottom right of the page.
+
+This will call OpenAI’s model and return the assistant response. Feel free to try other values, the model is very funny.
+
+You now have a first version of your prompt that you can use.
+
+
+COMMIT YOUR FIRST VERSION OF THIS PROMPT
+
+ 1. Click the Commit button
+ 2. Put “initial version” in the commit message field
+ 3. Click Commit
+
+[file:386f75eb-c97a-4923-9823-168a14848719]
+
+
+VIEW THE LOGS
+
+Under the Prompt File click ‘Logs’ to view all the generations from this Prompt
+
+Click on a row to see the details of what version of the prompt generated it. From here you can give feedback to that generation,
+see performance metrics, open up this example in the Editor, or add this log to a dataset.
+
+[file:f2b286b8-7fcf-4323-9308-6ca5fbc22e44]
+
+----------------------------------------------------------------------------------------------------------------------------------
+
+
+CREATE A PROMPT USING THE SDK
+
+The Humanloop Python SDK allows you to programmatically set up and version your Prompts [/docs/prompts] in Humanloop, and log
+generations from your models. This guide will show you how to create a Prompt using the SDK.
+
+**Prerequisite**: A Humanloop SDK Key.
+
+You can get this from your Organisation Settings page [https://app.humanloop.com/account/api-keys] if you have the right
+permissions [/docs/access-roles].
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+Continue in the same Python interpreter (where you have run humanloop = Humanloop(...)).
+
+Note: Prompts are still called 'projects' in the SDK and versions of Prompts are called 'model configs'
+
+
+CREATE THE PROMPT "PROJECT"
+
+project_response = humanloop.projects.create(name="sdk-tutorial")
+project_id = project_response.id
+
+
+
+REGISTER YOUR VERSION ("MODEL CONFIG")
+
+humanloop.model_configs.register(
+ project_id=project_id,
+ model="gpt-3.5-turbo",
+ prompt_template="Write a snappy introduction about {{topic}}:",
+ temperature=0.8,
+)
+
+
+
+GO TO THE APP
+
+Go to the Humanloop app [https://app.humanloop.com] and you will see your new project as a Prompt with the model config you just
+created.
+
+You now have a project in Humanloop that contains your model config. You can view your project and invite team members by going to
+the Project page.
+
+
+NEXT STEPS
+
+With the Prompt set up, you can now integrate it into your app by following the SDK/API integration guide
+[./generate-and-log-with-the-sdk].",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/create-prompt",
+ "title": "Create a Prompt",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -20025,6 +30676,42 @@ There are two ways to get your Logs into Humanloop, referred to as 'proxy' and '
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Generate and Log",
+ ],
+ "content": "A Log [/docs/logs] is created every time a Prompt [/docs/prompts] is called. The Log contain contains the inputs and the output
+(the generation) as well as metadata such as which version of the Prompt was used and any associated feedback.
+
+There are two ways to get your Logs into Humanloop, referred to as 'proxy' and 'async'.
+
+
+PROXIED
+
+In one call you can fetch the latest version of a Prompt, generate from the provider, stream the result back and log the result.
+Using Humanloop as a proxy is by far the most convenient and way of calling your LLM-based applications.
+
+
+ASYNC
+
+With the async method, you can fetch the latest version of a Prompt, generate from the provider, and log the result in separate
+calls. This is useful if you want to decouple the generation and logging steps, or if you want to log results from your own
+infrastructure. It also allows you to have no additional latency or servers on the critical path to your AI features.
+
+[file:07c15a4c-189a-4e79-a523-84a2383e596b]
+
+The guides in this section instruct you on how to create Logs on Humanloop. Once this is setup, you can begin to use Humanloop to
+evaluate and improve your LLM apps.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/generate-and-log-with-the-sdk",
+ "title": "Overview",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -20098,6 +30785,106 @@ The Humanloop Python SDK allows you to easily replace your \`openai.Completions.
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Generate and Log",
+ ],
+ "content": "The Humanloop Python SDK allows you to easily replace your openai.Completions.create() calls with a humanloop.complete() call
+that, in addition to calling OpenAI to get a generation, automatically logs the data to your Humanloop project.
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/guides/create-prompt] guide first.
+
+This guide assumes you're using an OpenAI model. If you want to use other providers or your own model please also look at our
+[guide to using your own model](./use-your-own-model-provider).
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+
+ACTIVATE A MODEL
+
+ 1. Log in to Humanloop and navigate to the Dashboard tab of your project.
+ 2. Ensure that the default environment is in green at the top of the dashboard, the default environment is mapped to your active
+ deployment. If there is no active deployment set, then use the dropdown button for the default environment and select the
+ Change deployment option to select one of your existing model configs to use to generate. You also need to confirm the model
+ you config you have deployed is a Completion model. This can be confirmed by clicking on the config in the table and viewing
+ the Endpoint, making sure it says Complete.[file:80902c85-018e-4e08-a456-60efd9794c5e]
+
+
+USE THE SDK TO CALL YOUR MODEL
+
+Now you can use the SDK to generate completions and log the results to your project.
+
+# humanloop.complete_deployed(...) will call the active model config on your project.
+# The inputs must match the input of the prompt template in your project.
+complete_response = humanloop.complete_deployed(
+ project="", # change the project name to your project
+ inputs={"question": "How should I think about competition for my startup?"},
+)
+
+# A single call to generate may return multiple outputs.
+data_id = complete_response.data[0].id
+output = complete_response.data[0].output
+
+# You can also access the raw response from OpenAI.
+print(complete_response.provider_responses)
+
+
+Navigate to your project's Logs tab in the browser to see the recorded inputs and outputs of your generation.
+
+🎉 Now that you have generations flowing through your project you can start to log your end user feedback to evaluate and improve
+your models.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/completion-using-the-sdk",
+ "title": "Generate completions",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -20263,6 +31050,113 @@ The Humanloop Python SDK allows you to easily replace your \`openai.ChatCompleti
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Generate and Log",
+ ],
+ "content": "The Humanloop Python SDK allows you to easily replace your openai.ChatCompletions.create() calls with a humanloop.chat() call
+that, in addition to calling OpenAI to get a response, automatically logs the data to your Humanloop project.
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/guides/create-prompt] guide first.
+
+This guide assumes you're using an OpenAI model. If you want to use other providers or your own model please also look at our
+[guide to using your own model](./use-your-own-model-provider).
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+
+ACTIVATE A MODEL
+
+ 1. Log in to Humanloop and navigate to the Models tab of your project.
+ 2. Ensure that the default environment is in green at the top of the dashboard. The default environment is mapped to your active
+ deployment. If there is no active deployment set, then use the dropdown button for the default environment and select the
+ Change deployment option to select one of your existing model configs to use to generate. You also need to confirm the model
+ you config you have deployed is a Chat model. This can be confirmed by clicking on the config in the table and viewing the
+ Endpoint, making sure it says Chat.
+
+[file:0beecbbf-2e01-4bcd-bc08-bb6d1fba1531]
+
+
+USE THE SDK TO CALL YOUR MODEL
+
+Now you can use the SDK to generate completions and log the results to your project:
+
+# humanloop.chat_deployed(...) will call the active model config on your project.
+# The inputs must match the input of the chat template in your project.
+chat_response = humanloop.chat_deployed(
+ project_id="YOUR_PROJECT_ID_HERE",
+ # inputs required by your chat_template - for example your templated system message.
+ inputs={"persona": "paul graham from YC"},
+ messages=[
+ {"role": "user", "content": "How should I think about competition for my startup?"}
+ ]
+)
+
+# A single call to chat may return multiple outputs.
+data_id = chat_response.data[0].id
+output = chat_response.data[0].output
+print(output)
+
+# You can also access the raw response from OpenAI.
+print(chat_response.provider_responses)
+
+
+Navigate to your project's Logs tab in the browser to see the recorded inputs, messages and responses of your chat.
+
+🎉 Now that you have chat messages flowing through your project you can start to log your end user feedback to evaluate and
+improve your models.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/chat-using-the-sdk",
+ "title": "Generate chat responses",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -20436,6 +31330,86 @@ This guide shows how to use the Humanloop SDK to record user feedback on datapoi
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Generate and Log",
+ ],
+ "content": "This guide shows how to use the Humanloop SDK to record user feedback on datapoints. This works equivalently for both the
+completion and chat APIs.
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/guides/create-prompt] guide first.
+
+ 2. Already have integrated humanloop.chat() or humanloop.complete() to log generations with the Python or TypeScript SDKs. If
+ not, follow our guide to integrating the SDK [./generate-and-log-with-the-sdk].
+
+
+RECORD FEEDBACK WITH THE DATAPOINT ID
+
+ 1. Extract the data ID from the humanloop.complete_deployed() response.
+
+ complete_response = humanloop.complete_deployed(
+ project="",
+ inputs={"question": "How should I think about competition for my startup?"},
+ )
+
+ data_id = completion.data[0].id
+
+
+ 2. Call humanloop.feedback() referencing the saved datapoint ID to record user feedback.
+ You can also include the source of the feedback when recording it.
+
+ # You can capture a single piece feedback
+ humanloop.feedback(data_id=data_id, type="rating", value="good")
+
+ # And you can associate the feedback to a specific user.
+ humanloop.feedback(data_id=data_id, type="rating", value="good", user="user_123456")
+
+
+The feedback recorded for each datapoint can be viewed in the Logs tab of your project.
+
+[file:0ec84dd6-c5d1-4ce8-91f5-9a504201b8dc]
+
+Different use cases and user interfaces may require different kinds of feedback that need to be mapped to the appropriate end user
+interaction. There are broadly 3 important kinds of feedback:
+
+ 1. Explicit feedback: these are purposeful actions to review the generations. For example, ‘thumbs up/down’ button presses.
+ 2. Implicit feedback: indirect actions taken by your users may signal whether the generation was good or bad, for example,
+ whether the user ‘copied’ the generation, ‘saved it’ or ‘dismissed it’ (which is negative feedback).
+ 3. Free-form feedback: Corrections and explanations provided by the end-user on the generation.
+
+
+RECORDING CORRECTIONS AS FEEDBACK
+
+It can also be useful to allow your users to correct the outputs of your model. This is strong feedback signal and can also be
+considered as ground truth data for finetuning later.
+
+# You can capture text based feedback to record corrections
+humanloop.feedback(data_id=data_id, type="correction", value="A user provided completion...")
+
+# And also include this as part of an array of feedback for a logged datapoint
+humanloop.feedback([
+ {"data_id": data_id, "type": "rating", "value": "bad"},
+ {"data_id": data_id, "type": "correction", "value": "A user provided summary..."},
+])
+
+
+[file:46692ab9-8116-45f4-a87b-e1a1bb6037b3]
+
+This feedback will also show up within Humanloop, where your internal users can also provide feedback and corrections on logged
+data to help with evaluation.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/capture-user-feedback",
+ "title": "Capture user feedback",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -20571,6 +31545,122 @@ The Humanloop Python SDK allows you to upload your historic model data to an exi
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Generate and Log",
+ ],
+ "content": "The Humanloop Python SDK allows you to upload your historic model data to an existing Humanloop project. This can be used to
+warm-start your project. The data can be considered for feedback and review alongside your new user generated data.
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/guides/create-prompt] guide first.
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+
+LOG HISTORIC DATA
+
+Grab your API key from your Settings page [https://app.humanloop.com/account/api-keys].
+
+ 1. Set up your code to first load up your historic data and then log this to Humanloop, explicitly passing details of the model
+ config (if available) alongside the inputs and output:
+
+ from humanloop import Humanloop
+ import openai
+
+ # Initialize Humanloop with your API key
+ humanloop = Humanloop(api_key="")
+
+ # NB: Add code here to load your existing model data before logging it to Humanloop
+
+ # Log the inputs, outputs and model config to your project - this log call can take batches of data.
+ log_response = humanloop.log(
+ project="",
+ inputs={"question": "How should I think about competition for my startup?"},
+ output=output,
+ config={
+ "model": "gpt-4",
+ "prompt_template": "Answer the following question like Paul Graham from YCombinator: {{question}}",
+ "temperature": 0.2,
+ },
+ source="sdk",
+ )
+
+ # Use the datapoint IDs to associate feedback received later to this datapoint.
+ data_id = log_response.id
+
+
+ 2. The process of capturing feedback then uses the returned log_id as before.
+
+ See our guide on capturing user feedback [./capture-user-feedback].
+
+ 3. You can also log immediate feedback alongside the input and outputs:
+
+ # Log the inputs, outputs and model config to your project.
+ log_response = humanloop.log(
+ project="",
+ inputs={"question": "How should I think about competition for my startup?"},
+ output=output,
+ config={
+ "model": "gpt-4",
+ "prompt_template": "Answer the following question like Paul Graham from YCombinator: {{question}}",
+ "temperature": 0.2,
+ },
+ source="sdk",
+ feedback={"type": "rating", "value": "good"}
+ )
+ ",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/upload-historic-data",
+ "title": "Upload historic data",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -20742,6 +31832,184 @@ In this guide, we walk through how to use these SDK methods to log data to Human
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Generate and Log",
+ ],
+ "content": "The humanloop.complete()and humanloop.chat() call encapsulates the LLM provider calls (for example openai.Completions.create()),
+the model-config selection and logging steps in a single unified interface. There may be scenarios that you wish to manage the LLM
+provider calls directly in your own code instead of relying on Humanloop.
+
+For example, you may be using an LLM provider that currently is not directly supported by Humanloop such as Hugging Face.
+
+To support using your own model provider, we provide additional humanloop.log() and humanloop.projects.get_active_config() methods
+in the SDK.
+
+In this guide, we walk through how to use these SDK methods to log data to Humanloop and run experiments.
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/guides/create-prompt] guide first.
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+
+LOG DATA TO YOUR PROJECT
+
+#### Set up your code to first get your model config from Humanloop, then call your LLM provider to get a completion (or chat
+response) and then log this, alongside the inputs, config and output:
+
+from humanloop import Humanloop
+import openai
+
+# Initialize Humanloop with your API key
+humanloop = Humanloop(api_key="")
+
+project_id = ""
+
+config = humanloop.projects.get_active_config(id=project_id).config
+
+client = openai.OpenAI(
+ # defaults to os.environ.get("OPENAI_API_KEY")
+ api_key="",
+)
+
+messages = [
+ {
+ "role": "user",
+ "content": "Say this is a test",
+ }
+]
+
+chat_completion = client.chat.completions.create(
+ messages=messages,
+ model=config.model,
+ temperature=config.temperature
+)
+
+# Parse the output from the OpenAI response.
+output = chat_completion.choices[0].message.content
+
+# Log the inputs, outputs and config to your project.
+log_response = humanloop.log(
+ project_id=project_id,
+ messages=messages
+ output=output,
+ config_id=config.id
+)
+
+# Use this ID to associate feedback received later to this datapoint.
+data_id = log_response.id
+
+
+THE PROCESS OF CAPTURING FEEDBACK THEN USES THE RETURNED DATA_ID AS BEFORE.
+
+See our guide on capturing user feedback [./capture-user-feedback].
+
+YOU CAN ALSO LOG IMMEDIATE FEEDBACK ALONGSIDE THE INPUT AND OUTPUTS:
+
+# Log the inputs, outputs and model config to your project.
+log_response = humanloop.log(
+ project_id=project_id,
+ messages=messages
+ output=output,
+ config_id=config.id,
+ feedback={"type": "rating", "value": "good"}
+)
+
+
+Note that you can also use a similar pattern for non-OpenAI LLM providers. For example, logging results from Hugging Face’s
+Inference API:
+
+import requests
+from humanloop import Humanloop
+
+# Initialize the SDK with your Humanloop API key
+humanloop = Humanloop(api_key="")
+
+# Make a generation using the Hugging Face Inference API.
+response = requests.post(
+ "https://api-inference.huggingface.co/models/gpt2",
+ headers={"Authorization": f"Bearer {}"},
+ json={
+ "inputs": "Answer the following question like Paul Graham from YCombinator:\\n"
+ "How should I think about competition for my startup?",
+ "parameters": {
+ "temperature": 0.2,
+ "return_full_text": False, # Otherwise, Hugging Face will return the prompt as part of the generation.
+ },
+ },
+).json()
+
+# Parse the output from the Hugging Face response.
+
+output = response[0]["generated_text"]
+
+# Log the inputs, outputs and model config to your project.
+
+log_response = humanloop.log(
+ project=project_id,
+ inputs={"question": "How should I think about competition for my startup?"},
+ output=output,
+ model_config={
+ "model": "gpt2",
+ "prompt_template": "Answer the following question like Paul Graham from YCombinator:\\n{{question}}",
+ "temperature": 0.2,
+},
+)
+
+
+\`\`\`",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/use-your-own-model-provider",
+ "title": "Logging",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -20975,6 +32243,440 @@ By following this guide, you will:
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Generate and Log",
+ ],
+ "content": "This guide contains 3 sections. We'll start with an example Python script that makes a series of calls to an LLM upon receiving a
+user request. In the first section, we'll log these calls to Humanloop. In the second section, we'll link up these calls to a
+single session so they can be easily inspected on Humanloop. Finally, we'll explore how to deal with nested logs within a session.
+
+By following this guide, you will:
+
+ * Have hooked up your backend system to use Humanloop.
+ * Be able to view session traces displaying sequences of LLM calls on Humanloop.
+ * Learn how to log complex session traces containing nested logs.
+
+
+PREREQUISITES
+
+ * A Humanloop account. If you don't have one, you can create an account now by going to the Sign up page
+ [https://app.humanloop.com/signup].
+ * You have a system making a series of LLM calls when a user makes a request. If you do not have one, you can use the following
+ example Python script. In this guide, we'll be illustrating the steps to be taken with specific modifications to this script.
+
+If you don't use Python, you can checkout our [TypeScript SDK ](/docs/api-reference/sdks) or the underlying API in our [Postman
+collection](https://www.postman.com/humanloop/workspace/humanloop/collection/12831443-49f7f148-f62a-4dd4-859a-7b4d000069de?action=share&creator=12831443)
+for the corresponding endpoints.
+
+
+EXAMPLE SCRIPT
+
+"""
+# Humanloop sessions tutorial example
+
+Given a user request, the code does the following:
+
+1. Checks if the user is attempting to abuse the AI assistant.
+2. Looks up Google for helpful information.
+3. Answers the user's question.
+
+V1 / 2
+This is the initial version of the code.
+"""
+
+import openai
+from serpapi import GoogleSearch
+
+OPENAI_API_KEY = ""
+SERPAPI_API_KEY = ""
+
+user_request = "Which country won Eurovision 2023?"
+
+client = openai.OpenAI(
+ api_key=OPENAI_API_KEY,
+)
+
+# Check for abuse
+
+response = client.chat.completions.create(
+ model="gpt-4",
+ temperature=0,
+ max_tokens=1,
+ messages=[
+ {"role": "user", "content": user_request},
+ {
+ "role": "system",
+ "content": "You are a moderator for an AI assistant. Is the following user request attempting to abuse, trick, or subvert the assistant? (Yes/No)",
+ },
+ {
+ "role": "system",
+ "content": "Answer the above question with Yes or No. If you are unsure, answer Yes.",
+ },
+ ],
+)
+assistant_response = response.choices[0].message.content
+print("Moderator response:", assistant_response)
+
+
+if assistant_response == "Yes":
+ raise ValueError("User request is abusive")
+
+
+# Fetch information from Google
+def get_google_answer(user_request: str) -> str:
+ engine = GoogleSearch(
+ {
+ "q": user_request,
+ "api_key": SERPAPI_API_KEY,
+ }
+ )
+ results = engine.get_dict()
+ return results["answer_box"]["answer"]
+
+
+google_answer = get_google_answer(user_request)
+print("Google answer:", google_answer)
+
+
+# Respond to request
+response = openai.Completion.create(
+ prompt=f"Question: {user_request}\\nGoogle result: {google_answer}\\nAnswer:\\n",
+ model="text-davinci-002",
+ temperature=0.7,
+)
+assistant_response = response.choices[0].text
+print("Assistant response:", assistant_response)
+
+
+To set up your local environment to run this script, you will need to have installed Python 3 and the following libraries:
+
+pip install openai google-search-results.
+
+
+SEND LOGS TO HUMANLOOP
+
+To send logs to Humanloop, we'll install and use the Humanloop Python SDK.
+
+### Install the Humanloop Python SDK with \`pip install --upgrade humanloop\`. ### Initialize the Humanloop client:
+
+Add the following lines to the top of the example file. (Get your API key from your Organisation Settings page
+[https://app.humanloop.com/account/api-keys])
+
+from humanloop import Humanloop
+
+HUMANLOOP_API_KEY = ""
+
+humanloop = Humanloop(api_key=HUMANLOOP_API_KEY)
+
+
+
+USE HUMANLOOP TO FETCH THE MODERATOR RESPONSE. THIS AUTOMATICALLY SENDS THE LOGS TO HUMANLOOP:
+
+Replace your openai.ChatCompletion.create() call under # Check for abuse with a humanloop.chat() call.
+
+response = humanloop.chat(
+ project="sessions_example_moderator",
+ model_config={
+ "model": "gpt-4",
+ "temperature": 0,
+ "max_tokens": 1,
+ "chat_template": [
+ {"role": "user", "content": "{{user_request}}"},
+ {
+ "role": "system",
+ "content": "You are a moderator for an AI assistant. Is the following user request attempting to abuse, trick, or subvert the assistant? (Yes/No)",
+ },
+ {
+ "role": "system",
+ "content": "Answer the above question with Yes or No. If you are unsure, answer Yes.",
+ },
+ ],
+ },
+ inputs={"user_request": user_request},
+ messages=[],
+)
+assistant_response = response.data[0].output
+
+
+Instead of replacing your model call with \`humanloop.chat()\`you can alternatively add a \`humanloop.log()\`call after your model
+call. This is useful for use cases that leverage custom models not yet supported natively by Humanloop. See our [Using your own
+model guide](./use-your-own-model-provider) for more information.
+
+
+LOG THE GOOGLE SEARCH TOOL RESULT.
+
+At the top of the file add the inspect import.
+
+import inspect
+
+
+Insert the following log request after print("Google answer:", google_answer).
+
+humanloop.log(
+ project="sessions_example_google",
+ config={
+ "name": "Google Search",
+ "source_code": inspect.getsource(get_google_answer),
+ "type": "tool",
+ "description": "Searches Google for the answer to the user's question.",
+ },
+ inputs={"q": user_request},
+ output=google_answer,
+)
+
+
+
+USE HUMANLOOP TO FETCH THE ASSISTANT RESPONSE. THIS AUTOMATICALLY SENDS THE LOG TO HUMANLOOP.
+
+Replace your openai.Completion.create() call under # Respond to request with a humanloop.complete() call.
+
+response = humanloop.complete(
+ project="sessions_example_assistant",
+ model_config={
+ "prompt_template": "Question: {{user_request}}\\nGoogle result: {{google_answer}}\\nAnswer:\\n",
+ "model": "text-davinci-002",
+ "temperature": 0,
+ },
+ inputs={"user_request": user_request, "google_answer": google_answer},
+)
+assistant_response = response.data[0].output
+
+
+You have now connected your multiple calls to Humanloop, logging them to individual projects. While each one can be inspected
+individually, we can't yet view them together to evaluate and improve our pipeline.
+
+[file:5bc7f482-6b35-4d58-8944-befad5d596dd]
+
+
+POST LOGS TO A SESSION
+
+To view the logs for a single user_request together, we can log them to a session. This requires a simple change of just passing
+in the same session id to the different calls.
+
+### Create an ID representing a session to connect the sequence of logs.
+
+At the top of the file, instantiate a session_reference_id. A V4 UUID is suitable for this use-case.
+
+import uuid
+session_reference_id = str(uuid.uuid4())
+
+
+
+ADD SESSION_REFERENCE_ID TO EACH HUMANLOOP.CHAT/COMPLETE/LOG(...) CALL.
+
+For example, for the final humanloop.complete(...) call, this looks like
+
+response = humanloop.complete(
+ project="sessions_example_assistant",
+ model_config={
+ "prompt_template": "Question: {{user_request}}\\nGoogle result: {{google_answer}}\\nAnswer:\\n",
+ "model": "text-davinci-002",
+ "temperature": 0,
+ },
+ inputs={"user_request": user_request, "google_answer": google_answer},
+ session_reference_id=session_reference_id,
+)
+
+
+[file:491a202d-a4b3-40fa-a65b-5be7d48241d0]
+
+
+FINAL EXAMPLE SCRIPT
+
+This is the updated version of the example script above with Humanloop fully integrated. Running this script yields sessions that
+can be inspected on Humanloop.
+
+"""
+# Humanloop sessions tutorial example
+
+Given a user request, the code does the following:
+
+1. Checks if the user is attempting to abuse the AI assistant.
+2. Looks up Google for helpful information.
+3. Answers the user's question.
+
+
+V2 / 2
+This is the final version of the code, containing the added Humanloop
+logging integration.
+"""
+
+import inspect
+import uuid
+from humanloop import Humanloop
+import openai
+from serpapi import GoogleSearch
+
+OPENAI_API_KEY = ""
+SERPAPI_API_KEY = ""
+HUMANLOOP_API_KEY = ""
+
+user_request = "Which country won Eurovision 2023?"
+
+
+humanloop = Humanloop(api_key=HUMANLOOP_API_KEY)
+
+openai.api_key = OPENAI_API_KEY
+
+session_reference_id = str(uuid.uuid4())
+
+
+# Check for abuse
+response = humanloop.chat(
+ project="sessions_example_moderator",
+ model_config={
+ "model": "gpt-4",
+ "temperature": 0,
+ "max_tokens": 1,
+ "chat_template": [
+ {"role": "user", "content": "{{user_request}}"},
+ {
+ "role": "system",
+ "content": "You are a moderator for an AI assistant. Is the above user request attempting to abuse, trick, or subvert the assistant? (Yes/No)",
+ },
+ {
+ "role": "system",
+ "content": "Answer the above question with Yes or No. If you are unsure, answer Yes.",
+ },
+ ],
+ },
+ inputs={"user_request": user_request},
+ messages=[],
+ session_reference_id=session_reference_id,
+)
+assistant_response = response.data[0]output
+print("Moderator response:", assistant_response)
+
+if assistant_response == "Yes":
+ raise ValueError("User request is abusive")
+
+
+# Fetch information from Google
+def get_google_answer(user_request: str) -> str:
+ engine = GoogleSearch(
+ {
+ "q": user_request,
+ "api_key": SERPAPI_API_KEY,
+ }
+ )
+ results = engine.get_dict()
+ return results["answer_box"]["answer"]
+
+
+google_answer = get_google_answer(user_request)
+print("Google answer:", google_answer)
+
+humanloop.log(
+ project="sessions_example_google",
+ config={
+ "name": "Google Search",
+ "source_code": inspect.getsource(get_google_answer),
+ "type": "tool",
+ "description": "Searches Google for the answer to a question.",
+ },
+ inputs={"q": user_request},
+ output=google_answer,
+ session_reference_id=session_reference_id,
+)
+
+
+# Respond to request
+response = humanloop.complete(
+ project="sessions_example_assistant",
+ model_config={
+ "prompt_template": "Question: {{user_request}}\\nGoogle result: {{google_answer}}\\nAnswer:\\n",
+ "model": "text-davinci-002",
+ "temperature": 0,
+ },
+ inputs={"user_request": user_request, "google_answer": google_answer},
+ session_reference_id=session_reference_id,
+)
+assistant_response = response.data[0].output
+print("Assistant response:", assistant_response)
+
+
+
+NESTING LOGS WITHIN A SESSION [EXTENSION]
+
+A more complicated trace involving nested logs, such as those recording an Agent's behaviour, can also be logged and viewed in
+Humanloop.
+
+First, post a log to a session, specifying both session_reference_id and reference_id. Then, pass in this reference_id as
+parent_reference_id in a subsequent log request. This indicates to Humanloop that this second log should be nested under the
+first.
+
+parent_log_reference_id = str(uuid.uuid4())
+
+parent_response = humanloop.log(
+ project="sessions_example_assistant",
+ config=config,
+ messages=messages,
+ inputs={"user_request": user_request},
+ output=assistant_response,
+ session_reference_id=session_reference_id,
+ reference_id=parent_log_reference_id,
+)
+
+child_response = humanloop.log(
+ project="sessions_example_assistant",
+ config=config,
+ messages=messages,
+ inputs={"user_request": user_request},
+ output=assistant_response,
+ session_reference_id=session_reference_id,
+ parent_reference_id=parent_log_reference_id,
+)
+
+
+3 logged datapoints within a session, with the second and third nested under the first.
+[file:87362e03-3f85-4dbd-b2c6-e5a331255595]
+
+Deferred output population
+
+In most cases, you don't know the output for a parent log until all of its children have completed. For instance, the root-level
+Agent will spin off multiple LLM requests before it can retrieve an output. To support this case, we allow logging without an
+output. The output can then be updated after the session is complete with a separate
+humanloop.logs_api.update_by_reference_id(reference_id, output) call.
+
+session_reference_id = uuid.uuid4().hex
+parent_reference_id = uuid.uuid4().hex
+
+# Log parent
+log_response = humanloop.log(
+ project="sessions_example_deferred_log",
+ inputs={"input": "parent"},
+ source="sdk",
+ config={
+ "model": "gpt-3.5-turbo",
+ "max_tokens": -1,
+ "temperature": 0.7,
+ "prompt_template": "A prompt template",
+ "type": "model",
+ },
+ session_reference_id=session_reference_id,
+ reference_id=parent_reference_id,
+)
+
+# Other processing and logging here, yielding a final output.
+output = "updated parent output"
+
+# Logging of output once it has been calculated.
+update_log_response = humanloop.logs.update_by_ref(
+ reference_id=parent_reference_id,
+ output=output,
+)
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/logging-session-traces",
+ "title": "Chaining calls (Sessions)",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -21530,6 +33232,468 @@ The core entity in the Humanloop evaluation framework is an **evaluator** - a fu
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Evaluation and Monitoring",
+ ],
+ "content": "A key part of successful prompt engineering and deployment for LLMs is a robust evaluation framework. In this section we provide
+guides for how to set up Humanloop's evaluation framework in your projects.
+
+The core entity in the Humanloop evaluation framework is an evaluator - a function you define which takes an LLM-generated log as
+an argument and returns an evaluation. The evaluation is typically either a boolean or a number, indicating how well the model
+performed according to criteria you determine based on your use case.
+
+
+TYPES
+
+Currently, you can define your evaluators in two different ways:
+
+ * Python - using our in-browser editor, define simple Python functions to act as evaluators
+ * LLM - use language models to evaluate themselves! Our evaluator editor allows you to define a special-purpose prompt which
+ passes data from the underlying log to a language model. This type of evaluation is particularly useful for more subjective
+ evaluation such as verifying appropriate tone-of-voice or factuality given an input set of facts.
+
+
+MODES: MONITORING VS. TESTING
+
+Evaluation is useful for both testing new model configs as you develop them and for monitoring live deployments that are already
+in production.
+
+To handle these different use cases, there are two distinct modes of evaluator - online and offline.
+
+
+ONLINE
+
+Online evaluators are for use on logs generated in your project, including live in production. Typically, they are used to monitor
+deployed model performance over time.
+
+Online evaluators can be set to run automatically whenever logs are added to a project. The evaluator takes the log as an
+argument.
+
+
+OFFLINE
+
+Offline evaluators are for use with predefined test datasets [./datasets] in order to evaluate models as you iterate in your
+prompt engineering workflow, or to test for regressions in a CI environment.
+
+A test dataset is a collection of datapoints, which are roughly analogous to unit tests or test cases in traditional programming.
+Each datapoint specifies inputs to your model and (optionally) some target data.
+
+When you run an offline evaluation, Humanloop iterates through each datapoint in the dataset and triggers a fresh LLM generation
+using the inputs of the testcase and the model config being evaluated. For each test case, your evaluator function will be called,
+taking as arguments the freshly generated log and the testcase datapoint that gave rise to it. Typically, you would write your
+evaluator to perform some domain-specific logic to determine whether the model-generated log meets your desired criteria (as
+specified in the datapoint 'target').
+
+
+HUMANLOOP-HOSTED VS. SELF-HOSTED
+
+Conceptually, evaluation runs have two components:
+
+ 1. Generation of logs from the datapoints
+ 2. Evaluating those logs.
+
+Using the Evaluations API, Humanloop offers the ability to generate logs either within the Humanloop runtime, or self-hosted.
+Similarly, evaluations of the logs can be performed in the Humanloop runtime (using evaluators that you can define in-app) or
+self-hosted (see our guide on self-hosted evaluations [./self-hosted-evaluations]).
+
+In fact, it's possible to mix-and-match self-hosted and Humanloop-runtime generations and evaluations in any combination you wish.
+When creating an evaluation via the API, set the hl_generated flag to False to indicate that you are posting the logs from your
+own infrastructure (see our guide on evaluating externally-generated logs [./evaluating-externally-generated-logs]). Include an
+evaluator of type External to indicate that you will post evaluation results from your own infrastructure. You can include
+multiple evaluators on any run, and these can include any combination of External (i.e. self-hosted) and Humanloop-runtime
+evaluators.
+
+----------------------------------------------------------------------------------------------------------------------------------
+
+title: Evaluating LLM Applications authors: ["Peter Hayes"] type: Blog date: 2024-02-06 draft: false published: true tags: ["llm",
+"gpt-4", "evals"]
+
+summary: An overview of evaluating LLM applications. The emerging evaluation framework, parallels to traditional software testing
+and some guidance on best practices.
+
+
+THUMBNAIL: /BLOG/EVALUATING-LLM-APPS/EVALLLMAPPSTHUMBNAIL2.PNG
+
+An ever-increasing number of companies are using large language models (LLMs) to transform both their product experiences and
+internal operations. These kinds of foundation models represent a new computing platform. The process of prompt engineering
+[https://humanloop.com/blog/prompt-engineering-101] is replacing aspects of software development and the scope of what software
+can achieve is rapidly expanding.
+
+In order to effectively leverage LLMs in production, having confidence in how they perform is paramount. This represents a unique
+challenge for most companies given the inherent novelty and complexities surrounding LLMs. Unlike traditional software and
+non-generative machine learning (ML) models, evaluation is subjective, hard to automate and the risk of the system going
+embarrassingly wrong is higher.
+
+This post provides some thoughts on evaluating LLMs and discusses some emerging patterns I've seen work well in practice from
+experience with thousands of teams deploying LLM applications in production.
+
+
+LLMS ARE NOT ALL YOU NEED
+
+It’s important to first understand the basic makeup of what we are evaluating when working with LLMs in production. As the models
+get increasingly more powerful, a significant amount of effort is spent trying to give the model the appropriate context and
+access required to solve a task.
+
+For the current generation of models, at the core of any LLM app is usually some combination of the following components:
+
+ * LLM model - the core reasoning engine; an API into OpenAI, Anthropic, Google, or open source alternatives like Mistral
+ [https://mistral.ai/news/mixtral-of-experts/].
+ * Prompt template - the boilerplate instructions to your model, which are shared between requests. This is generally versioned
+ and managed like code using formats like the .prompt [https://docs.humanloop.com/docs/prompt-file-format] file.
+ * Data sources - to provide the relevant context to the model; often referred to as retrieval augmented generation (RAG).
+ Examples being traditional relational databases, graph databases, and vector databases
+ [https://docs.humanloop.com/docs/setup-semantic-search].
+ * Memory - like a data source, but that builds up a history of previous interactions with the model for re-use.
+ * Tools - provides access to actions like API calls and code execution empowering the model to interact with external systems
+ where appropriate.
+ * Agent control flow - some form of looping logic that allows the model to make multiple generations to solve a task before
+ hitting some stopping criteria.
+ * Guardrails - a check that is run on the output of the model before returning the output to the user. This can be simple logic,
+ for example looking for certain keywords, or another model. Often triggering fallback to human-in-the-loop workflows
+
+
+LLM APPS ARE COMPLEX SYSTEMS
+
+These individual components represent a large and unique design space to navigate. The configuration of each one requires careful
+consideration; it's no longer just strictly prompt engineering.
+
+For example, take the vector database - now a mainstay for the problem of providing the relevant chunks of context to the model,
+for a particular query, from a larger corpus of documents. There is a near infinite number of open or closed source vector stores
+to choose from. Then there is the embedding model (that also has its own design choices), retrieval technique, similarity metric,
+how to chunk your documents, how to sync your vector store... and the list goes on.
+
+Not only that, but there are often complex interactions between these components that are hard to predict. For example, maybe the
+performance of your prompt template is weirdly sensitive to the format of the separator tokens you forgot to strip when chunking
+your documents in the vector database (a real personal anecdote).
+
+Furthermore, we're seeing applications that have multiple specialist blocks of these components chained together to solve a task.
+This all adds to the challenge of evaluating the resulting complex system. Specialist tooling is increasingly a necessity to help
+teams build robust applications.
+
+Like for testing in traditional software development, the goal of a good LLM evaluation framework is to provide confidence that
+the system is working as expected and also transparency into what might be causing issues when things go wrong. Unlike traditional
+software development, a significant amount of experimentation and collaboration is required when building with LLMs. From prompt
+engineering with domain experts, to tool integrations with engineers. A systematic way to track progress is required.
+
+
+TAKE LESSONS FROM TRADITIONAL SOFTWARE
+
+A large proportion of teams now building great products with LLMs aren't experienced ML practitioners. Conveniently many of the
+goals and best practices from software development are broadly still relevant when thinking about LLM evals.
+
+
+AUTOMATION AND CONTINUOUS INTEGRATION IS STILL THE GOAL
+
+Competent teams will traditionally set up robust test suites that are run automatically against every system change before
+deploying to production. This is a key aspect of continuous integration (CI) and is done to protect against regressions and ensure
+the system is working as the engineers expect. Test suites are generally made up of 3 canonical types of tests: unit, integration
+and end-to-end.
+
+ * Unit - very numerous, target a specific atom of code and are fast to run.
+ * Integration - less numerous, cover multiple chunks of code, are slower to run than unit tests and may require mocking external
+ services.
+ * End-to-end - emulate the experience of an end UI user or API caller; they are slow to run and oftentimes need to interact with
+ a live version of the system.
+
+The most effective mix of test types for a given system often sparks debate. Yet, the role of automated testing as part of the
+deployment lifecycle, alongside the various trade-offs between complexity and speed, remain valuable considerations when working
+with LLMs.
+
+
+UNIT TESTS ARE TRICKY FOR LLMS
+
+There are however a number of fundamental differences with LLM native products when it comes to this type of testing. Of the test
+types, the most difficult to transfer over to LLMs is the unit test because of:
+
+ * Randomness - LLMs produce probabilities over words which can result in random variation between generations for the same
+ prompt. Certain applications, like task automation, require deterministic predictions. Others, like creative writing, demand
+ diversity.
+ * Subjectivity - we oftentimes want LLMs to produce natural human-like interactions. This requires more nuanced approaches to
+ evaluation because of the inherent subjectivity of the correctness of outputs, which may depend on context or user preferences.
+ * Cost and latency - given the computation involved, running SOTA LLMs can come with a significant cost and tend to have
+ relatively high latency; especially if configured as an agent that can take multiple steps.
+ * Scope - LLMs are increasingly capable of solving broader less well-defined tasks, resulting in the scope of what we are
+ evaluating often being a lot more open-ended than in traditional software applications.
+
+As a result, the majority of automation efforts in evaluating LLM apps take the form of integration and end-to-end style tests and
+should be managed as such within CI pipelines.
+
+
+OBSERVABILITY NEEDS TO EVOLVE
+
+There is also the important practice of monitoring the system in production. Load and usage patterns in the wild can be unexpected
+and lead to bugs. Traditional observability solutions like Datadog [https://www.datadoghq.com/] and New Relic
+[https://newrelic.com/] monitor the health of the system and provide alerts when things go wrong; usually based on simple
+heuristics and error codes. This tends to fall short when it comes to LLMs. The more capable and complex the system, the harder it
+can be to determine something actually went wrong and the more important observability and traceability is.
+
+Furthermore, one of the promises of building with LLMs is the potential to more rapidly intervene and experiment. By tweaking
+instructions you can fix issues and improve performance. Another advantage is that less technical teams can be more involved in
+building; the makeup of the teams [https://humanloop.com/blog/how-to-build-the-right-team-for-generative-ai] is evolving. This
+impacts what's needed from an observability solution in this setting. A tighter integration between observability data and the
+development environment to make changes is more beneficial, as well as usability for collaborating with product teams and domain
+experts outside of engineering. This promise of more rapid and sometimes non-technical iteration cycles also increases the
+importance of robust regression testing.
+
+Before delving more into the stages of evaluation and how they relate to existing CI and observability concepts, it's important to
+understand more about the different types of evaluations in this space.
+
+
+TYPES OF EVALUATION CAN VARY SIGNIFICANTLY
+
+When evaluating one or more components of an LLM block, different types of evaluations are appropriate depending on your goals,
+the complexity of the task and available resources. Having good coverage over the components that are likely to have an impact
+over the overall quality of the system is important.
+
+These different types can be roughly characterized by the return type and the source of, as well as the criteria for, the judgment
+required.
+
+
+JUDGMENT RETURN TYPES ARE BEST KEPT SIMPLE
+
+The most common judgment return types are familiar from traditional data science and machine learning frameworks. From simple to
+more complex:
+
+ * Binary - involves a yes/no, true/false, or pass/fail judgment based on some criteria.
+ * Categorical - involves more than two categories; for exampling adding an abstain or maybe option to a binary judgment.
+ * Ranking - the relative quality of output from different samples or variations of the model are being ranked from best to worst
+ based on some criteria. Preference based judgments are often used in evaluating the quality of a ranking.
+ * Numerical - involves a score, a percentage, or any other kind of numeric rating.
+ * Text - a simple comment or a more detailed critique. Often used when a more nuanced or detailed evaluation of the model's
+ output is required.
+ * Multi-task - combines multiple types of judgment simultaneously. For example, a model's output could be evaluated using both a
+ binary rating and a free-form text explanation.
+
+Simple individual judgments can be easily aggregated across a dataset of multiple examples using well known metrics. For example,
+for classification problems, precision [https://en.wikipedia.org/wiki/Accuracy_and_precision], recall
+[https://en.wikipedia.org/wiki/Precision_and_recall] and F1 [https://en.wikipedia.org/wiki/F-score] are typical choices. For
+rankings, there are metrics like NDCG [https://en.wikipedia.org/wiki/Discounted_cumulative_gain], Elo ratings
+[https://en.wikipedia.org/wiki/Elo_rating_system] and Kendall's Tau
+[https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient]. For numerical judgments there are variations of the Bleu
+score [https://blog.modernmt.com/understanding-mt-quality-bleu-scores/].
+
+I find that in practice binary and categorical types generally cover the majority of use cases. They have the added benefit of
+being the most straight forward to source reliably. The more complex the judgment type, the more potential for ambiguity there is
+and the harder it becomes to make inferences.
+
+
+MODEL SOURCED JUDGMENTS ARE INCREASINGLY PROMISING
+
+Sourcing judgments is an area where there are new and evolving patterns around foundation models like LLMs. At Humanloop, we've
+standardised around the following canonical sources:
+
+ * Heuristic/Code - using simple deterministic rules based judgments against attributes like cost, token usage, latency, regex
+ rules on the output, etc. These are generally fast and cheap to run at scale.
+ * Model (or 'AI') - using other foundation models to provide judgments on the output of the component. This allows for more
+ qualitative and nuanced judgments for a fraction of the cost of human judgments.
+ * Human - getting gold standard judgments from either end users of your application, or internal domain experts. This can be the
+ most expensive and slowest option, but also the most reliable.
+
+![](https://humanloop.com/blog/evaluating-llm-apps/LLMEvals.jpg)
+
+Model judgments in particular are increasingly promising and an active research area. The paper Judging LLM-as-a-Judge
+[https://arxiv.org/abs/2306.05685] demonstrates that an appropriately prompted GPT-4 model achieves over 80% agreement with human
+judgments when rating LLM model responses to questions on a scale of 1-10; that's equivalent to the levels of agreement between
+humans.
+
+Such evaluators can be equally effective in evaluating the important non-LLM components, such as the retrieval component in RAG.
+In Automated Evaluation of Retrieval Augmented Generation [https://arxiv.org/pdf/2309.15217.pdf] a GPT-3 model is tasked with
+extracting the most relevant sentences from the retrieved context. A numeric judgment for relevance is then computed using the
+ratio of the number of relevant to irrelevant sentences, which was also found to be highly correlated with expert human judgments.
+
+However, there are risks to consider. The same reasons that evaluating LLMs is hard apply to using them as evaluators. Recent
+research has also shown LLMs to have biases that can contaminate the evaluation process. In Benchmarking Cognitive Biases in Large
+Language Models as Evaluators [https://arxiv.org/pdf/2309.17012.pdf] they measure 6 cognitive biases across 15 different LLM
+variations. They find that simple details such as the order of the results presented to the model can have material impact on the
+evaluation.
+
+![](https://humanloop.com/blog/evaluating-llm-apps/BiasLLMEvals.png)
+
+The takeaway here is that it's important to still experiment with performance on your target use cases before trusting LLM
+evaluators - evaluate the evaluator! All the usual prompt engineering techniques such as including few-shot examples are just as
+applicable here. In addition, fine-tuning specialist, more economical evaluator models using human judgements can be a real
+unlock.
+
+I believe teams should consider shifting more of their human judgment efforts up a level to focus on helping improve model
+evaluators. This will ultimately lead to a more scalable, repeatable and cost-effective evaluation process. As well as one where
+the human expertise can be more targeted on the most important high value scenarios.
+
+
+JUDGMENT CRITERIA IS WHERE MOST OF THE CUSTOMISATION HAPPENS
+
+The actual criteria for the judgment is what tends to be most specific to the needs of a particular use case. This will either be
+defined in code, in a prompt (or in the parameters of a model), or just in guidelines depending on whether it's a code, model or
+human based evaluator.
+
+There are lots of broad themes to crib from. Humanloop for example provides templates for popular use cases and best practises,
+with the ability to experiment and customize. There are categories like general performance (latency, cost and error thresholds),
+behavioural (tone of voice, writing style, diversity, factuality, relevance, etc.), ethical (bias, safety, privacy, etc.) and user
+experience (engagement, satisfaction, productivity, etc.).
+
+Unsurprisingly, starting with a small set of evaluators that cover the most important criteria is wise. These can then be adapted
+and added to over time as requirements are clarified and new edge cases uncovered. Tradeoffs are often necessary between these
+criteria. For example, a more diverse set of responses might be more engaging, but also more likely to contain errors and higher
+quality can often come at a cost in terms of latency.
+
+Thinking about these criteria upfront for your project can be a good hack to ensure your team deeply understand the end goals of
+the application.
+
+
+DIFFERENT STAGES OF EVALUATION ARE NECESSARY
+
+As discussed with the distinction between CI and observability; different stages of the app development lifecycle will have
+different evaluation needs. I've found this lifecycle to naturally still consist of some sort of planning and scoping exercise,
+followed by cycles of development, deployment and monitoring.
+
+These cycles are then repeated during the lifetime of the LLM app in order to intervene and improve performance. The stronger the
+teams, the more agile and continuous this process tends to be.
+
+Development here will include both the typical app development; orchestrating your LLM blocks in code, setting up your UIs, etc,
+as well more LLM specific interventions and experimentation; including prompt engineering, context tweaking, tool integration
+updates and fine-tuning - to name a few. Both the choices and quality of interventions to optimize your LLM performance
+[https://humanloop.com/blog/optimizing-llms] are much improved if the right evaluation stages are in place. It facilitates a more
+data-driven, systematic approach.
+
+From my experience there are 3 complementary stages of evaluation that are highest ROI in supporting rapid iteration cycles of the
+LLM block related interventions:
+
+ 1. Interactive - it's useful to have an interactive playground-like editor environment that allows rapid experimentation with
+ components of the model and provides immediate evaluator feedback. This usually works best on a relatively small number of
+ scenarios. This allows teams (both technical and non-technical) to quickly explore the design space of the LLM app and get an
+ informal sense of what works well.
+
+ 2. Batch offline - benchmarking or regression testing the most promising variations over a larger curated set of scenarios to
+ provide a more systematic evaluation. Ideally a range of different evaluators for different components of the app can
+ contribute to this stage, some comparing against gold standard expected results for the task. This can fit naturally into
+ existing CI processes.
+
+ 3. Monitoring online - post deployment, real user interactions can be evaluated continuously to monitor the performance of the
+ model. This process can drive alerts, gather additional scenarios for offline evaluations and inform when to make further
+ interventions. Staging deployments through internal environments, or beta testing with selected cohorts of users first, are
+ usually super valuable.
+
+![](https://humanloop.com/blog/evaluating-llm-apps/EvalsStages.png)
+
+It's usually necessary to co-evolve to some degree the evaluation framework alongside the app development as more data becomes
+available and requirements are clarified. The ability to easily version control and share across stages and teams both the
+evaluators and the configuration of your app can significantly improve the efficiency of this process.
+
+
+HIGH QUALITY DATASETS ARE STILL PARAMOUNT
+
+Lack of access to high quality data will undermine any good evaluation framework. A good evaluation dataset should ideally be
+representative of the full distribution of behaviours you expect to see and care about in production, considering both the inputs
+and the expected outputs. It's also important to keep in mind that coverage of the expected behaviours for the individual
+components of your app is important.
+
+Here are some strategies that I think are worth considering: leveraging public/academic benchmarks, collecting data from your own
+systems and creating synthetic data.
+
+
+PAY ATTENTION TO ACADEMIC AND PUBLIC BENCHMARKS
+
+There are well cited academic benchmarks that have been curated to evaluate the general capabilities of LLMs. For AI leaders,
+these can be helpful to reference when choosing which base models to build with originally, or to graduate to when things like
+scale and cost start to factor in. For example the Large Model Systems Organizations [https://lmsys.org/] maintains Chatbot Arena
+[https://chat.lmsys.org/] where they have crowd-sourced over 200k human preferences votes to rank LLMs, both commercial and open
+source, as well as recording the performance on academic multi-task reasoning benchmarks like MMLU
+[https://arxiv.org/abs/2009.03300].
+
+![](https://humanloop.com/blog/evaluating-llm-apps/ChatbotArena.png)
+
+Another great resource in the same vein is Hugging Face datasets [https://huggingface.co/docs/datasets/index], where they also
+maintain a leaderboard of how all the latest OSS models perform across a range of tasks using the Eleuther LLM evaluation harness
+library [https://github.com/EleutherAI/lm-evaluation-harness].
+
+![](https://humanloop.com/blog/evaluating-llm-apps/HuggingFaceLeaderBoard.png)
+
+More domain specific academic datasets may also be particularly relevant for your target use case and can be used to warm start
+your evaluation efforts; for example if you were working on medical related tasks
+[https://huggingface.co/datasets/AdaptLLM/medicine-tasks].
+
+
+REAL PRODUCT INTERACTIONS ARE THE MOST VALUABLE SOURCE OF DATA
+
+Arguably the best form of dataset comes from real user interactions. Useful sources of this kind of data are actually the
+interactive and monitoring stages discussed above.
+
+With access to an interactive environment for prompt engineering (or a test version of your application), internal domain experts
+can synthesize examples of the kinds of interactions they expect to see in production. These interactions should be recorded
+throughout the course of initial experimentation to form a benchmark dataset for subsequent offline evaluations.
+
+For leveraging real end-user interactions, a tighter integration between observability data and the development environment that
+manages evaluations makes it easier to curate real scenarios into your benchmark datasets over time.
+
+![](https://humanloop.com/blog/evaluating-llm-apps/HLEditor.png)
+
+Something worth careful consideration to maximise the impact of end-user interactions is to set up your application to capture
+rich feedback [https://docs.humanloop.com/docs/capture-user-feedback-using-the-sdk] from users form the start. This is an example
+of an online evaluator that relies on human judgments, which can be used to filter for particularly interesting scenarios to add
+to benchmark datasets.
+
+Feedback doesn't need to be only explicit from the user; it can be provided implicitly in the way they interact with the system.
+For example, github copilot reportedly
+[https://thakkarparth007.github.io/copilot-explorer/posts/copilot-internals.html#secret-sauce-3-telemetry] monitors whether the
+code suggestion was accepted at various time increments after the suggestion was made, as well as whether the user made any edits
+to the suggestion before accepting it.
+
+
+SYNTHETIC DATA IS ON THE RISE
+
+Once you have a small amount of high quality data leveraging LLMs to generate additional input examples can help bootstrap to
+larger datasets. By utilizing few-shot prompting and including a representative subset of your existing data within the prompt,
+you can guide the synthesizer model to generate a wide range of supplementary examples.
+
+A quick pointer here is to prompt the model to generate a batch of examples at a time, rather than one at a time, such that you
+can encourage characteristics like diversity between examples. Or, similarly, feed previously generated examples back into your
+prompt. For instance, for a customer service system, prompts could be designed to elicit responses across a variety of emotional
+states, from satisfaction to frustration.
+
+A specific example of this is model red-teaming, or synthesizing adversarial examples. This is where you use the synthesizer model
+to generate examples that are designed to break the system. For example, in Red Teaming Language Models with Language Models
+[https://arxiv.org/abs/2202.03286], they uncover offensive replies, data leakage and other vulnerabilities in an LLM chat-bot
+using variations of few-shot prompts to generate adversarial questions. They also leverage a pre-trained offensive classifier to
+help automate their evaluation process. However, it is worth noting they too point out the limitations caused by LLM biases that
+limits diversity. They ultimately need to generate and filter hundreds of thousands of synthetic examples.
+
+![](https://humanloop.com/blog/evaluating-llm-apps/RedLM.png)
+
+As with LLM evaluators, all the same rigour and tools should be applied to evaluating the quality of the synthetic data generator
+model before trusting it.
+
+
+LOOKING FORWARD...
+
+This is a rapidly evolving area of research and practice. Here's a few areas that I'm particularly excited about working more on
+at Humanloop over the coming months that we'll touch on further in future posts:
+
+ * Increasing adoption of AI based evaluators for all components of these systems, with improved support for fine-tuning and
+ specialisation happening at this level. The existence of OpenAI's Superalignment team
+ [https://openai.com/blog/introducing-superalignment] shows there is focus here on the research front.
+
+ * Supporting more multi-modal applications deployed in production, with more text, image, voice and even video based models
+ coming online.
+
+ * More complex agent-based workflows and experimenting with more multi-agent setups and how evaluation needs to adapt to
+ supervise these systems.
+
+ * Moving towards more end-to-end optimization for the components of these complex systems. A robust set of evaluators can provide
+ an objective to measure performance, coupled with data synthesization to simulate the system.
+
+At Humanloop, we've built an integrated solution for managing the development lifecycle of LLM apps from first principles, which
+includes some of the evaluation challenges discussed in this post. Please reach out [https://humanloop.com/demo] if you'd like to
+learn more.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/evaluation/overview",
+ "title": "Overview",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -22545,6 +34709,168 @@ In this guide, we will walk through creating a dataset and using it to run an of
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Evaluation and Monitoring",
+ ],
+ "content": "This feature is not available for the Free tier. Please contact us if you wish to learn more about our [Enterprise
+plan](https://humanloop.com/pricing)
+
+
+CREATE AN OFFLINE EVALUATOR
+
+
+PREREQUISITES
+
+ * You need to have access to Evaluations
+ * You also need to have a Prompt – if not, please follow our Prompt creation [./create-prompt] guide.
+ * Finally, you need at least a few Logs in your prompt. Use the Editor to generate some logs if you have none.
+
+You need logs for your project because we will use these as a source of test datapoints for the dataset we create. If you want to
+make arbitrary test datapoints from scratch, see our guide to doing this from the API. We will soon update the app to enable
+arbitrary test datapoint creation from your browser.
+
+For this example, we will evaluate a model responsible for extracting critical information from a customer service request and
+returning this information in JSON. In the image below, you can see the model config we've drafted on the left and an example of
+it running against a customer query on the right.
+
+[file:8080ba55-925f-47cd-b82f-dc8aa98571e8]
+
+
+SET UP A DATASET
+
+We will create a dataset based on existing logs in the project.
+
+### Navigate to the **Logs** tab ### Select the logs you would like to convert into test datapoints ### From the dropdown menu in
+the top right (see below), choose **Add to Dataset**
+
+Creating test datapoints from a selection of existing project datapoints. [file:655fb25a-9793-477b-9d5d-798d4ba1c5de]
+
+
+IN THE DIALOG BOX, GIVE THE NEW DATASET A NAME AND PROVIDE AN OPTIONAL DESCRIPTION. CLICK CREATE DATASET.
+
+[file:73655869-d91b-4f77-9810-fcc21aefdc76] You can add more datapoints to the same dataset later by clicking the 'add to existing
+dataset' button at the top.
+
+
+GO TO THE DATASETS TAB.
+
+
+CLICK ON THE NEWLY CREATED DATASET. ONE DATAPOINT WILL BE PRESENT FOR EACH LOG YOU SELECTED IN STEP 3
+
+The newly created dataset, containing datapoints converted from existing logs in the project.
+[file:9805a880-7620-4497-a769-07598dde39c2]
+
+
+CLICK ON A DATAPOINT TO INSPECT ITS PARAMETERS.
+
+A test datapoint contains inputs (the variables passed into your model config template), an optional sequence of messages (if used
+for a chat model) and a target representing the desired output.
+
+When existing logs are converted to datapoints, the datapoint target defaults to the output of the source Log.
+
+
+
+In our example, we created datapoints from existing logs. The default behaviour is that the original log's output becomes an
+output field in the target JSON.
+
+To access the feature field more efficiently in our evaluator, we'll modify the datapoint targets to be a raw JSON with a feature
+key.
+
+The original log was an LLM generation which outputted a JSON value. The conversion process has placed this into the \`output\`
+field of the testcase target. [file:2e2eb023-b89a-43c6-b5c2-e5b84fc15c1e]
+
+
+MODIFY THE DATAPOINT IF YOU NEED TO MAKE REFINEMENTS
+
+You can provide an arbitrary JSON object as the target.
+
+After editing, we have a clean JSON object recording the salient characteristics of the datapoint's expected output.
+[file:4ff8aa48-b589-4729-90ed-9faaf7f3ecca]
+
+
+CREATE AN OFFLINE EVALUATOR
+
+Having set up a dataset, we'll now create the evaluator. As with online evaluators, it's a Python function but for offline mode,
+it also takes a testcase parameter alongside the generated log.
+
+### Navigate to the evaluations section, and then the Evaluators tab ### Select **+ New Evaluator** and choose **Offline
+Evaluation** ### Choose **Start from scratch**
+
+For this example, we'll use the code below to compare the LLM generated output with what we expected for that testcase.
+
+import json
+from json import JSONDecodeError
+
+def it_extracts_correct_feature(log, testcase):
+ expected_feature = testcase["target"]["feature"]
+
+ try:
+ # The model is expected to produce valid JSON output
+ # but it could fail to do so.
+ output = json.loads(log["output"])
+ actual_feature = output.get("feature", None)
+ return expected_feature == actual_feature
+
+ except JSONDecodeError:
+ # If the model didn't even produce valid JSON, then
+ # we evaluate the output as bad.
+ return False
+
+
+
+USE THE DEBUG CONSOLE
+
+In the debug console at the bottom of the dialog, click Load data and then Datapoints from dataset. Select the dataset you created
+in the previous section. The console will be populated with its datapoints.
+
+The debug console. Use this to load test datapoints from a dataset and perform debug runs with any model config in your project.
+[file:7e43053b-b57c-4e36-b07b-6456b9392066]
+
+CHOOSE A MODEL CONFIG FROM THE DROPDOWN MENU.
+
+CLICK THE RUN BUTTON AT THE FAR RIGHT OF ONE OF THE TEST DATAPOINTS.
+
+A new debug run will be triggered, which causes an LLM generation using that datapoint's inputs and messages parameters. The
+generated log and the test datapoint will be passed to the evaluator, and the resulting evaluation will be displayed in the Result
+column.
+
+
+CLICK CREATE WHEN YOU ARE HAPPY WITH THE EVALUATOR.
+
+
+TRIGGER AN OFFLINE EVALUATION
+
+Now that you have an offline evaluator and a dataset, you can use them to evaluate the performance of any model config in your
+project.
+
+### Go to the **Evaluations** section. ### In the **Runs** tab, click **Run Evaluation** ### In the dialog box, choose a model
+config to evaluate and select your newly created dataset and evaluator. [file:d00f8623-7f27-4fdf-b06c-fde2c8b9efbb]
+
+
+CLICK BATCH GENERATE
+
+
+A NEW EVALUATION IS LAUNCHED. CLICK ON THE CARD TO INSPECT THE RESULTS.
+
+A batch generation has now been triggered. This means that the model config you selected will be used to generate a log for each
+datapoint in the dataset. It may take some time for the evaluation to complete, depending on how many test datapoints are in your
+dataset and what model config you are using. Once all the logs have been generated, the evaluator will execute for each in turn.
+
+
+INSPECT THE RESULTS OF THE EVALUATION.
+
+[file:aec29366-9b7e-4fdc-a157-4ea58d927f0f]",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/evaluation/evaluate-models-offline",
+ "title": "Run an evaluation",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -22801,6 +35127,379 @@ In this guide, we'll walk through an example of using our API to create dataset
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Evaluation and Monitoring",
+ ],
+ "content": "This feature is not available for the Free tier. Please contact us if you wish to learn more about our [Enterprise
+plan](https://humanloop.com/pricing) This guide uses our [Python SDK](/docs/api-reference/sdks). All of the endpoints used are
+available in our [TypeScript SDK](/docs/api-reference/sdks) and directly [via the API](/docs/reference/humanloop-api).
+
+
+PREREQUISITES:
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+
+CREATE EVALUATION
+
+We'll go through how to use the SDK in a Python script to set up a project, create a dataset and then finally trigger an
+evaluation.
+
+
+SET UP A PROJECT
+
+### Import Humanloop and set your [Humanloop](https://app.humanloop.com/account/api-keys) and [OpenAI
+API](https://platform.openai.com/account/api-keys) keys.
+
+from humanloop import Humanloop
+
+HUMANLOOP_API_KEY = ""
+OPENAI_API_KEY = ""
+
+# Initialize the Humanloop client
+humanloop = Humanloop(
+ api_key=HUMANLOOP_API_KEY,
+ openai_api_key=OPENAI_API_KEY,
+)
+
+
+
+CREATE A PROJECT AND REGISTER YOUR FIRST MODEL CONFIG
+
+We'll use OpenAI's GPT-4 for extracting product feature names from customer queries in this example. The first model config
+created against the project is automatically deployed:
+
+
+# Create a project
+project = humanloop.projects.create(name="evals-guide")
+project_id = project.id
+
+# Create the first model config for the project, which will automatically be deployed
+model_config = humanloop.model_configs.register(
+ project_id=project_id,
+ model="gpt-4",
+ name="Entity extractor v0",
+ endpoint="chat",
+ chat_template=[
+ {
+ "role": "system",
+ "content": "Extract the name of the feature or issue the customer is describing. "
+ "Possible features are only: evaluations, experiments, fine-tuning \\n"
+ "Write your response in json format as follows:"
+ ' \\n {"feature": "feature requested", "issue": "description of issue"}',
+ }
+ ],
+)
+config_id = model_config.config.id
+
+
+If you log onto your Humanloop account you will now see your project with a single model config defined:
+
+[file:059b2c9d-dc60-4616-8bf6-8fa2b7bc5a54]
+
+
+CREATE A DATASET
+
+Follow the steps in our guide to Upload a Dataset via API [./create-a-dataset#upload-via-api].
+
+### Now test your model manually by generating a log for one of the datapoints' messages:
+
+# Generate a log
+log = humanloop.chat_deployed(
+ project_id=project_id,
+ messages=data[0]["messages"],
+ inputs={"features": "evaluations, experiments, fine-tuning"},
+).data[0]
+
+import json
+print(json.dumps(log))
+
+
+You can see from the output field in the response that the model has done a good job at extracting the mentioned features in the
+desired json format:
+
+{
+ "id": "data_aVUA2QZPHaQTnhoOCG7yS",
+ "model_config_id": "config_RbbfjXOkEnzYK6PS8cS96",
+ "messages": [
+ {
+ "role": "system",
+ "content": "Extract the name of the feature or issue the customer is describing. Possible features are only: evaluations, experiments, fine-tuning \\nWrite your response in json format as follows: \\n {\\"feature\\": \\"feature requested\\", \\"issue\\": \\"description of issue\\"}"
+ },
+ {
+ "role": "user",
+ "content": "Hi Humanloop support team, I'm having trouble understanding how to use the evaluations feature in your software. Can you provide a step-by-step guide or any resources to help me get started?"
+ }
+ ],
+ "output": "{\\"feature\\": \\"evaluations\\", \\"issue\\": \\"trouble understanding how to use the evaluations feature\\"}",
+ "finish_reason": "stop"
+}
+
+
+
+CREATE AN EVALUATOR
+
+Now that you have a project with a model config and a dataset defined, you can create an evaluator that will determine the success
+criteria for a log generated from the model using the target defined in the test datapoint.
+
+### Create an evaluator to determine if the extracted JSON is correct and test it against the generated log and the corresponding
+test datapoint:
+
+# Define an evaluator
+import json
+from json import JSONDecodeError
+
+
+def check_feature_json(datapoint, testcase):
+ expected_feature = testcase["target"]["feature"]
+
+ try:
+ # The model is expected to produce valid JSON output but it could fail to do so.
+ output = json.loads(datapoint["output"])
+ actual_feature = output.get("feature", None)
+ return expected_feature == actual_feature
+ except JSONDecodeError:
+ # If the model didn't even produce valid JSON, then it fails
+ return False
+
+# Try out the evalutor
+print(f"Test case result: {check_feature_json(datapoint, data[0])}")
+
+
+Test case result: True
+
+
+
+SUBMIT THIS EVALUATOR TO HUMANLOOP
+
+This means it can be used for future evaluations triggered via the UI or the API:
+
+import inspect
+
+# The evaluator must be sent as a string, so we convert it first
+json_imports = "import json\\nfrom json import JSONDecodeError\\n"
+evaluator_code = json_imports + inspect.getsource(check_feature_json)
+
+# Send evaluator to Humanloop
+evaluator = humanloop.evaluators.create(
+ name="Feature request json",
+ description="Validate that the json returned by the model matches the target json",
+ code=evaluator_code,
+ arguments_type="target_required",
+ return_type="boolean",
+)
+evaluator_id = evaluator.id
+
+
+In your Humanloop project you will now see an evaluator defined:
+
+[file:049be476-19fa-46d5-8a89-ad4a55a5f65b]
+
+
+LAUNCH AN EVALUATION
+
+
+LAUNCH AN EVALUATION
+
+You can now low against the model config using the dataset and evaluator. In practice you can include more than one evaluator:
+
+# Finally trigger an evaluation
+evaluation = humanloop.evaluations.create(
+ project_id=project_id,
+ evaluator_ids=[evaluator_id],
+ config_id=config_id,
+ dataset_id=dataset_id,
+)
+
+
+Navigate to your Humanloop account to see the evaluation results. Initially it will be in a pending state, but will quickly move
+to completed given the small number of test cases. The datapoints generated by your model as part of the evaluation will also be
+recorded in your project's logs table.
+
+[file:2c0093f5-6e1e-43ee-a902-5315e2c54dc6]
+
+
+CREATE EVALUATION - FULL SCRIPT
+
+Here is the full script you can copy and paste and run in your Python environment:
+
+from humanloop import Humanloop
+import inspect
+import json
+from json import JSONDecodeError
+
+
+HUMANLOOP_API_KEY = ""
+OPENAI_API_KEY = ""
+
+# Initialize the Humanloop client
+humanloop = Humanloop(
+ api_key=HUMANLOOP_API_KEY,
+ openai_api_key=OPENAI_API_KEY,
+)
+
+# Create a project
+project = humanloop.projects.create(name="evals-guide")
+project_id = project.id
+
+# Create the first model config for the project, which will automatically be deployed
+model_config = humanloop.model_configs.register(
+ project_id=project_id,
+ model="gpt-4",
+ name="Entity extractor v0",
+ chat_template=[
+ {
+ "role": "system",
+ "content": "Extract the name of the feature or issue the customer is describing. "
+ "Possible features are only: evaluations, experiments, fine-tuning \\n"
+ "Write your response in json format as follows:"
+ ' \\n {"feature": "feature requested", "issue": "description of issue"}',
+ }
+ ],
+ endpoint="chat",
+ temperature=0.5,
+)
+config_id = model_config.config.id
+
+# Example test case data
+data = [
+ {
+ "messages": [
+ {
+ "role": "user",
+ "content": "Hi Humanloop support team, I'm having trouble understanding how to use the evaluations feature in your software. Can you provide a step-by-step guide or any resources to help me get started?",
+ }
+ ],
+ "target": {"feature": "evaluations", "issue": "needs step-by-step guide"},
+ "inputs": {},
+ },
+ {
+ "messages": [
+ {
+ "role": "user",
+ "content": "Hi there, I'm interested in fine-tuning a language model using your software. Can you explain the process and provide any best practices or guidelines?",
+ }
+ ],
+ "target": {
+ "feature": "fine-tuning",
+ "issue": "process explanation and best practices",
+ },
+ "inputs": {},
+ },
+]
+
+# Create a dataset
+dataset = humanloop.datasets.create(
+ project_id=project_id,
+ name="Target feature requests",
+ description="Target feature request json extractions",
+)
+
+# Create test datapoints for the dataset
+datapoints = humanloop.datasets.create_datapoint(
+ dataset_id=dataset.id,
+ body=data,
+)
+
+# Generate a log
+log = humanloop.chat_deployed(
+ project_id=project_id,
+ messages=data[0]["messages"],
+).data[0]
+
+
+# Define an evaluator
+
+def check_feature_json(log, testcase):
+ expected_feature = testcase["target"]["feature"]
+
+ try:
+ # The model is expected to produce valid JSON output but it could fail to do so.
+ output = json.loads(log["output"])
+ actual_feature = output.get("feature", None)
+ return expected_feature == actual_feature
+
+ except JSONDecodeError:
+ # If the model didn't even produce valid JSON, then it fails
+ return False
+
+
+# Try out the evalutor
+print(f"Test case result: {check_feature_json(log, data[0])}")
+
+# The evaluator must be sent as a string, so we convert it first
+json_imports = "import json\\nfrom json import JSONDecodeError\\n"
+evaluator_code = json_imports + inspect.getsource(check_feature_json)
+
+# Send evaluator to Humanloop
+evaluator = humanloop.evaluators.create(
+ name="Feature request json",
+ description="Validate that the json returned by the model matches the target json",
+ code=evaluator_code,
+ arguments_type="target_required",
+ return_type="boolean",
+)
+
+# Finally trigger an evaluation
+evaluation = humanloop.evaluations.create(
+ project_id=project_id,
+ evaluator_ids=[evaluator.id],
+ config_id=config_id,
+ dataset_id=dataset_id,
+)
+
+# Now navigate to your project's evaluations tab on humanloop to inspect the results
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/evaluation/evaluations-using-api",
+ "title": "Set up evaluations using API",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -23332,6 +36031,102 @@ In this guide, we'll show how to set up LLM evaluations.",
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Evaluation and Monitoring",
+ ],
+ "content": "As well as using Python code to evaluate Logs, you can also create special-purpose prompts for LLMs to evaluate Logs too.
+
+In this guide, we'll show how to set up LLM evaluations.
+
+
+PREREQUISITES
+
+ * You need to have access to evaluations.
+ * You also need to have a Prompt – if not, please follow our Prompt creation [./create-prompt] guide.
+ * Finally, you need at least a few logs in your project. Use the Editor to generate some logs if you don't have any yet.
+
+
+SET UP AN LLM EVALUATOR
+
+### From the Evaluations page, click **New Evaluator** and select AI. [file:11a44ce1-398f-453b-8b66-c37554d5b900]
+
+
+FROM THE PRESETS MENU ON THE LEFT-HAND SIDE OF THE PAGE, SELECT PII.
+
+[file:62811b9d-eed1-46cd-a6a6-b7470d032d8a]
+
+
+SET THE EVALUATOR TO ONLINE MODE, AND TOGGLE AUTO-RUN TO ON. THIS WILL MAKE THE PII CHECKER RUN ON ALL NEW LOGS IN THE PROJECT.
+
+The **PII check** evaluator. [file:34010c77-07d5-4909-b783-9886a9f2b981]
+
+
+CLICK CREATE IN THE BOTTOM LEFT OF THE PAGE.
+
+
+GO TO EDITOR AND TRY GENERATING A COUPLE OF LOGS, SOME CONTAINING PII AND SOME WITHOUT.
+
+
+GO TO THE LOGS TABLE TO REVIEW THESE LOGS.
+
+The logs table, showing that the **PII check** evaluator ran on the latest logs. [file:b0e90cac-c79b-48cb-a65d-bdeee3fd7973]
+
+
+CLICK ONE OF THE LOGS TO SEE MORE DETAILS IN THE DRAWER.
+
+In our example below, you can see that the the log did contain PII, and the PII check evaluator has correctly identified this and
+flagged it with False.
+
+[file:336509bf-529d-43d8-8463-1778b966c60a]
+
+
+CLICK VIEW SESSION AT THE TOP OF LOG DRAWER TO INSPECT IN MORE DETAIL THE LLM EVALUATOR'S GENERATION ITSELF.
+
+
+SELECT THE PII CHECK ENTRY IN THE SESSION TRACE
+
+In the Completed Prompt tab of the log, you'll see the full input and output of the LLM evaluator generation.
+
+The LLM evaluator produced an explanation reasoning why the underlying log did contain PII, and terminated with a final verdict of
+'False'. [file:71d8a56c-3289-405e-9eee-a90ced776e67]
+
+
+AVAILABLE VARIABLES
+
+In the prompt editor for an LLM evaluator, you have access to the underlying log you are evaluating as well as the testcase that
+gave rise to it in the case of offline evaluations. These are accessed with the standard {{ variable }} syntax, enhanced with a
+familiar dot notation to pick out specific values from inside the log and testcase objects. The log and testcase shown in the
+debug console correspond to the objects available in the context of the LLM evaluator prompt.
+
+For example, suppose you are evaluating a log object like this.
+
+{
+ "id": "data_B3RmIu9aA5FibdtXP7CkO",
+ "model_config": {...},
+ "inputs": {
+ "hello": "world",
+ },
+ "messages": []
+ "output": "This is what the AI responded with.",
+ ...etc
+}
+
+
+In the LLM evaluator prompt, if you write {{ log.inputs.hello }} it will be replaced with world in the final prompt sent to the
+LLM evaluator model.
+
+Note that in order to get access to the fully populated prompt that was sent in the underlying log, you can use {{ log_prompt }}.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/evaluation/use-llms-to-evaluate-logs",
+ "title": "Use LLMs to evaluate logs",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -23489,6 +36284,158 @@ In this guide, we'll show an example of setting up a simple script to run such a
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Evaluation and Monitoring",
+ ],
+ "content": "For some use cases, you may wish to run your evaluation process outside of Humanloop, as opposed to running the evaluators we
+offer in our Humanloop runtime.
+
+For example, you may have implemented an evaluator that uses your own custom model or which has to interact with multiple systems.
+In these cases, you can continue to leverage the datasets you have curated on Humanloop, as well as consolidate all of the results
+alongside the prompts you maintain in Humanloop.
+
+In this guide, we'll show an example of setting up a simple script to run such a self-hosted evaluation using our Python SDK.
+
+
+PREREQUISITES
+
+ * You need to have access to evaluations
+ * You also need to have a Prompt – if not, please follow our Prompt creation [./create-prompt] guide.
+ * You need to have a dataset in your project. See our dataset creation [./datasets] guide if you don't yet have one.
+ * You need to have a model config that you're trying to evaluate - create one in the Editor.
+
+
+SETTING UP THE SCRIPT
+
+### Install the latest version of the Humanloop Python SDK:
+
+pip install humanloop
+
+
+
+IN A NEW PYTHON SCRIPT, IMPORT THE HUMANLOOP SDK AND CREATE AN INSTANCE OF THE CLIENT:
+
+from humanloop import Humanloop
+
+humanloop = Humanloop(
+ api_key=YOUR_API_KEY, # Replace with your API key
+)
+
+
+
+RETRIEVE THE ID OF THE HUMANLOOP PROJECT YOU ARE WORKING IN - YOU CAN FIND THIS IN THE HUMANLOOP APP
+
+PROJECT_ID = ... # Replace with the project ID
+
+
+
+RETRIEVE THE DATASET YOU'RE GOING TO USE FOR EVALUATION FROM THE PROJECT
+
+# Retrieve a dataset
+DATASET_ID = ... # Replace with the dataset ID you are using for evaluation (this should be inside the project)
+datapoints = humanloop.datasets.list_datapoints(DATASET_ID).records
+
+
+
+CREATE AN EXTERNAL EVALUATOR
+
+# Create an external evaluator
+evaluator = humanloop.evaluators.create(
+ name="My External Evaluator",
+ description="An evaluator that runs outside of Humanloop runtime.",
+ type="external",
+ arguments_type="target_required",
+ return_type="boolean",
+)
+
+
+
+RETRIEVE THE MODEL CONFIG YOU'RE EVALUATING
+
+CONFIG_ID = ... # Replace with the model config ID you are evaluating (should be inside the project)
+model_config = humanloop.model_configs.get(CONFIG_ID)
+
+
+
+INITIATE AN EVALUATION RUN IN HUMANLOOP
+
+evaluation_run = humanloop.evaluations.create(
+ project_id=PROJECT_ID,
+ config_id=CONFIG_ID,
+ evaluator_ids=[EVALUATOR_ID],
+ dataset_id=DATASET_ID,
+)
+
+
+After this step, you'll see a new run in the Humanloop app, under the Evaluations tab of your project. It should have status
+running.
+
+
+ITERATE THROUGH THE DATAPOINTS IN YOUR DATASET AND USE THE MODEL CONFIG TO GENERATE LOGS FROM THEM
+
+logs = []
+for datapoint in datapoints:
+ log = humanloop.chat_model_config(
+ project_id=PROJECT_ID,
+ model_config_id=model_config.id,
+ inputs=datapoint.inputs,
+ messages=[
+ {key: value for key, value in dict(message).items() if value is not None}
+ for message in datapoint.messages
+ ],
+ source_datapoint_id=datapoint.id,
+ ).data[0]
+ logs.append((log, datapoint))
+
+
+
+EVALUATE THE LOGS USING YOUR OWN EVALUATION LOGIC AND POST THE RESULTS BACK TO HUMANLOOP
+
+In this example, we use an extremely simple evaluation function for clarity.
+
+for log, datapoint in logs:
+ # The datapoint's 'target' field tells us the correct answer for this datapoint
+ expected_answer = str(datapoint.target["answer"])
+
+ # The log output is what the model produced
+ model_output = log.output
+
+ # The evaluation is a boolean, indicating whether the model was correct.
+ result = expected_answer == model_output
+
+ # Post the result back to Humanloop.
+ evaluation_result_log = humanloop.evaluations.log_result(
+ log_id=log.id,
+ evaluator_id=evaluator.id,
+ evaluation_run_external_id=evaluation_run.id,
+ result=result,
+ )
+
+
+
+MARK THE EVALUATION RUN AS COMPLETED
+
+humanloop.evaluations.update_status(id=evaluation_run.id, status="completed")
+
+
+
+REVIEW THE RESULTS
+
+After running this script with the appropriate resource IDs (project, dataset, model config), you should see the results in the
+Humanloop app, right alongside any other evaluations you have performed using the Humanloop runtime.
+
+[file:02883bb7-c6f9-41ee-a4b5-7d03cd6cfdff]",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/evaluation/self-hosted-evaluations",
+ "title": "Self-hosted evaluations",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -23700,6 +36647,221 @@ This works with any evaluator - if you have configured a Humanloop-runtime evalu
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Evaluation and Monitoring",
+ ],
+ "content": "If running your infrastructure to generate logs, you can still leverage the Humanloop evaluations suite via our API. The workflow
+looks like this:
+
+ 1. Trigger the creation of an evaluation run
+ 2. Loop through the datapoints in your dataset and perform generations on your side
+ 3. Post the generated logs to the evaluation run
+
+This works with any evaluator - if you have configured a Humanloop-runtime evaluator, these will be automatically run on each log
+you post to the evaluation run; or, you can use self-hosted evaluators and post the results to the evaluation run yourself (see
+Self-hosted evaluations [./self-hosted-evaluations]).
+
+
+PREREQUISITES
+
+ * You need to have access to evaluations
+ * You also need to have a project created - if not, please first follow our project creation guides.
+ * You need to have a dataset in your project. See our dataset creation guide if you don't yet have one.
+ * You need a model configuration to evaluate, so create one in the Editor.
+
+
+SETTING UP THE SCRIPT
+
+
+INSTALL THE LATEST VERSION OF THE HUMANLOOP PYTHON SDK
+
+pip install humanloop
+
+
+
+IN A NEW PYTHON SCRIPT, IMPORT THE HUMANLOOP SDK AND CREATE AN INSTANCE OF THE CLIENT
+
+humanloop = Humanloop(
+ api_key=YOUR_API_KEY, # Replace with your Humanloop API key
+)
+
+
+
+RETRIEVE THE ID OF THE HUMANLOOP PROJECT YOU ARE WORKING IN
+
+You can find this in the Humanloop app.
+
+PROJECT_ID = ... # Replace with the project ID
+
+
+
+RETRIEVE THE DATASET YOU'RE GOING TO USE FOR EVALUATION FROM THE PROJECT
+
+# Retrieve a dataset
+DATASET_ID = ... # Replace with the dataset ID you use for evaluation.
+ # This must be a dataset in the project you are working on.
+datapoints = humanloop.datasets.list_datapoints(DATASET_ID).records
+
+
+
+SET UP THE MODEL CONFIG YOU ARE EVALUATING
+
+If you constructed this in Humanloop, retrieve it by calling:
+
+config = humanloop.model_configs.get(id=CONFIG_ID)
+
+
+Alternatively, if your model config lives outside the Humanloop system, post it to Humanloop with the register model config
+endpoint [/api-reference/model-configs/model-configs-register].
+
+Either way, you need the ID of the config.
+
+CONFIG_ID =
+
+
+
+IN THE HUMANLOOP APP, CREATE AN EVALUATOR
+
+We'll create a Valid JSON checker for this guide.
+
+ 1. Visit the Evaluations tab, and select Evaluators
+ 2. Click + New Evaluator and choose Code from the options.
+ 3. Select the Valid JSON preset on the left.
+ 4. Choose the mode Offline in the settings panel on the left.
+ 5. Click Create.
+ 6. Copy your new evaluator's ID from the address bar. It starts with evfn_.
+
+EVALUATOR_ID =
+
+
+
+CREATE AN EVALUATION RUN WITH HL_GENERATED SET TO FALSE
+
+This tells the Humanloop runtime that it should not trigger evaluations but wait for them to be posted via the API.
+
+evaluation_run = humanloop.evaluations.create(
+ project_id=PROJECT_ID,
+ config_id=CONFIG_ID,
+ dataset_id=DATASET_ID,
+ evaluator_ids=[EVALUATOR_ID],
+ hl_generated=False,
+)
+
+
+By default, the evaluation status after creation is pending. Before sending the generation logs, set the status to running.
+
+humanloop.evaluations.update_status(id=evaluation_run.id, status="running")
+
+
+
+ITERATE THROUGH THE DATAPOINTS IN THE DATASET, PRODUCE A GENERATION AND POST THE EVALUATION
+
+for datapoint in datapoints:
+ # Use the datapoint to produce a log with the model config you are testing.
+ # This will depend on whatever model calling setup you are using on your side.
+ # For simplicity, we simply log a hardcoded
+ log = {
+ "project_id": PROJECT_ID,
+ "config_id": CONFIG_ID,
+ "messages": [*config.chat_template, *datapoint.messages],
+ "output": "Hello World!",
+ }
+
+ print(f"Logging generation for datapoint {datapoint.id}")
+ humanloop.evaluations.log(
+ evaluation_id=evaluation_run.id,
+ log=log,
+ datapoint_id=datapoint.id,
+ )
+
+
+RUN THE FULL SCRIPT ABOVE.
+
+If everything goes well, you should now have posted a new evaluation run to Humanloop and logged all the generations derived from
+the underlying datapoints.
+
+The Humanloop evaluation runtime will now iterate through those logs and run the Valid JSON evaluator on each. To check progress:
+
+
+VISIT YOUR PROJECT IN THE HUMANLOOP APP AND GO TO THE EVALUATIONS TAB.
+
+You should see the run you recently created; click through to it, and you'll see rows in the table showing the generations.
+
+[file:39edb66d-3992-49ee-a731-7113e9b1c161]
+
+In this case, all the evaluations returned False because the "Hello World!" string wasn't valid JSON. Try logging something valid
+JSON to check that everything works as expected.
+
+
+FULL SCRIPT
+
+For reference, here's the full script to get started quickly.
+
+from humanloop import Humanloop
+
+API_KEY =
+
+humanloop = Humanloop(
+ api_key=API_KEY,
+)
+
+PROJECT_ID =
+DATASET_ID =
+CONFIG_ID =
+EVALUATOR_ID =
+
+# Retrieve the datapoints in the dataset.
+datapoints = humanloop.datasets.list_datapoints(dataset_id=DATASET_ID).records
+
+# Retrieve the model config
+config = humanloop.model_configs.get(id=CONFIG_ID)
+
+# Create the evaluation run
+evaluation_run = humanloop.evaluations.create(
+ project_id=PROJECT_ID,
+ config_id=CONFIG_ID,
+ dataset_id=DATASET_ID,
+ evaluator_ids=[EVALUATOR_ID],
+ hl_generated=False,
+)
+print(f"Started evaluation run {evaluation_run.id}")
+
+# Set the status of the run to running.
+humanloop.evaluations.update_status(id=evaluation_run.id, status="running")
+
+# Iterate the datapoints and log a generation for each one.
+for i, datapoint in enumerate(datapoints):
+ # Produce the log somehow. This is up to you and your external setup!
+ log = {
+ "project_id": PROJECT_ID,
+ "config_id": CONFIG_ID,
+ "messages": [*config.chat_template, *datapoint.messages],
+ "output": "Hello World!", # Hardcoded example for demonstration
+ }
+
+ print(f"Logging generation for datapoint {datapoint.id}")
+ humanloop.evaluations.log(
+ evaluation_id=evaluation_run.id,
+ log=log,
+ datapoint_id=datapoint.id,
+ )
+
+print(f"Completed evaluation run {evaluation_run.id}")
+
+
+It's also a good practice to wrap the above code in a try-except block and to mark the evaluation run as failed (using
+\`update_status\`) if an exception causes something to fail.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/evaluation/evaluating-externally-generated-logs",
+ "title": "Evaluating externally generated Logs",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -23968,6 +37130,75 @@ This guide demonstrates how to run a batch generation and collect manual human f
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Evaluation and Monitoring",
+ ],
+ "content": "PREREQUISITES
+
+ * You need to have access to evaluations.
+ * You also need to have a Prompt – if not, please follow our Prompt creation [./create-prompt] guide.
+ * Finally, you need at least a few logs in your project. Use the Editor to generate some logs if you don't have any yet.
+
+
+SET UP AN EVALUATOR TO COLLECT HUMAN FEEDBACK
+
+
+CREATE A 'HUMAN' EVALUATOR
+
+From the Evaluations page, click New Evaluator and select Human.
+
+[file:11a44ce1-398f-453b-8b66-c37554d5b900]
+
+
+GIVE THE EVALUATOR A NAME AND DESCRIPTION AND CLICK CREATE IN THE TOP-RIGHT.
+
+
+RETURN TO THE EVALUATIONS PAGE AND SELECT RUN EVALUATION.
+
+
+CHOOSE THE MODEL CONFIG YOU ARE EVALUATING, A DATASET YOU WOULD LIKE TO EVALUATE AGAINST AND THEN SELECT THE NEW HUMAN EVALUATOR.
+
+[file:bf679b98-0e5b-4e99-ad07-3254acf99099]
+
+
+CLICK BATCH GENERATE AND FOLLOW THE LINK IN THE BOTTOM-RIGHT CORNER TO SEE THE EVALUATION RUN.
+
+[file:e5e9a9ae-65a2-4d62-8119-ef9520694764]
+
+
+VIEW THE DETAILS
+
+As the rows populate with the generated output from the model, you can review those outputs and apply feedback in the rating
+column. Click a row to see the full details of the Log in a drawer.
+
+
+APPLY YOUR FEEDBACK EITHER DIRECTLY IN THE TABLE, OR FROM THE DRAWER.
+
+[file:c25a30e5-1043-44f5-ad8b-3b39608f016c]
+
+
+ONCE YOU'VE FINISHED PROVIDING FEEDBACK FOR ALL THE LOGS IN THE RUN, CLICK MARK AS COMPLETE IN THE TOP RIGHT OF THE PAGE.
+
+
+YOU CAN REVIEW THE AGGREGATED FEEDBACK RESULTS IN THE STATS SECTION ON THIS PAGE.
+
+
+CONFIGURING THE FEEDBACK SCHEMA
+
+If you need a more complex feedback schema, visit the Settings page in your project and follow the link to Feedbacks. Here, you
+can add more categories to the default feedback types. If you need more control over feedback types, you can create new ones via
+the API [/api-reference/projects/createfeedbacktype].",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/evaluation/evaluating-with-human-feedback",
+ "title": "Evaluating with human feedback",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -24093,6 +37324,119 @@ In this guide, we will demonstrate how to create and use online evaluators to ob
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Evaluation and Monitoring",
+ ],
+ "content": "This feature is not available for the Free tier. Please contact us if you wish to learn more about our [Enterprise
+plan](https://humanloop.com/pricing)
+
+
+CREATE AN ONLINE EVALUATOR
+
+
+PREREQUISITES
+
+ * You need to have access to evaluations.
+ * You also need to have a Prompt – if not, please follow our Prompt creation [./create-prompt] guide.
+ * Finally, you need at least a few logs in your project. Use the Editor to generate some logs if you don't have any yet.
+
+To set up an online Python evaluator:
+
+### Go to the **Evaluations** page in one of your projects and select the **Evaluators** tab ### Select **+ New Evaluator** and
+choose **Code Evaluator** in the dialog
+
+Selecting the type of a new evaluator [file:ae416e3c-b35e-44ac-8f1b-df468e180299]
+
+
+FROM THE LIBRARY OF PRESETS ON THE LEFT-HAND SIDE, WE'LL CHOOSE VALID JSON FOR THIS GUIDE. YOU'LL SEE A PRE-POPULATED EVALUATOR
+WITH PYTHON CODE THAT CHECKS THE OUTPUT OF OUR MODEL IS VALID JSON GRAMMAR.
+
+The evaluator editor after selecting **Valid JSON** preset [file:0926fe33-2c96-4b99-922a-aa777f7590fe]
+
+
+IN THE DEBUG CONSOLE AT THE BOTTOM OF THE DIALOG, CLICK RANDOM LOGS FROM PROJECT. THE CONSOLE WILL BE POPULATED WITH FIVE
+DATAPOINTS FROM YOUR PROJECT.
+
+The debug console (you can resize this area to make it easier to view the logs) [file:6a1798be-fa63-4dc5-b13d-0aceac0500f9]
+
+
+CLICK THE RUN BUTTON AT THE FAR RIGHT OF ONE OF THE LOG ROWS. AFTER A MOMENT, YOU'LL SEE THE RESULT COLUMN POPULATED WITH A TRUE
+OR FALSE.
+
+The **Valid JSON** evaluator returned \`True\` for this particular log, indicating the text output by the model was grammatically
+correct JSON. [file:7080ea4b-4bea-4871-b9a4-a234eb3b9d5d]
+
+
+EXPLORE THE LOG DICTIONARY IN THE TABLE TO HELP UNDERSTAND WHAT IS AVAILABLE ON THE PYTHON OBJECT PASSED INTO THE EVALUATOR.
+
+
+CLICK CREATE ON THE LEFT SIDE OF THE PAGE.
+
+
+ACTIVATE AN EVALUATOR FOR A PROJECT
+
+### On the new **Valid JSON ** evaluator in the Evaluations tab, toggle the switch to **on** - the evaluator is now activated for
+the current project.
+
+Activating the new evaluator to run automatically on your project. [file:a3a01a65-2832-43bf-a0e1-fec1f1b3157e]
+
+
+GO TO THE EDITOR, AND GENERATE SOME FRESH LOGS WITH YOUR MODEL.
+
+
+OVER IN THE LOGS TAB YOU'LL SEE THE NEW LOGS. THE VALID JSON EVALUATOR RUNS AUTOMATICALLY ON THESE NEW LOGS, AND THE RESULTS ARE
+DISPLAYED IN THE TABLE.
+
+The **Logs** table includes a column for each activated evaluator in your project. Each activated evaluator runs on any new logs
+in the project. [file:ac285d9d-e0ef-4c41-b57f-0be0d0f2bddc]
+
+
+TRACK THE PERFORMANCE OF MODELS
+
+
+PREREQUISITES
+
+ * A Humanloop project with a reasonable amount of data.
+ * An Evaluator activated in that project.
+
+To track the performance of different model configs in your project:
+
+
+GO TO THE DASHBOARD TAB.
+
+In the table of model configs at the bottom, choose a subset of the project's model configs.
+
+
+USE THE GRAPH CONTROLS
+
+At the top of the page to select the date range and time granularity of interest.
+
+
+REVIEW THE RELATIVE PERFORMANCE
+
+For each activated Evaluator shown in the graphs, you can see the relative performance of the model configs you selected.
+
+[file:5b3dbcef-c44a-44ed-84c1-b6f3f7f7dd8a] The following Python modules are available to be imported in your code evaluators:
+ * re
+ * math
+ * random
+ * datetime
+ * json (useful for validating JSON grammar as per the example above)
+ * jsonschema (useful for more fine-grained validation of JSON output - see the in-app example)
+ * sqlglot (useful for validating SQL query grammar)
+ * requests (useful to make further LLM calls as part of your evaluation - see the in-app example for a suggestion of how to get
+ started).",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/evaluation/monitoring",
+ "title": "Set up Monitoring",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -24292,6 +37636,39 @@ Datasets can be created via CSV upload, converting from existing Logs in your pr
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Datasets",
+ ],
+ "content": "Datasets are pre-defined collections of input-output pairs that you can use within Humanloop to define fixed examples for your
+projects.
+
+A datapoint consists of three things:
+
+ * Inputs: a collection of prompt variable values which are interpolated into the prompt template of your model config at
+ generation time (i.e. they replace the {{ variables }} you define in the prompt template.
+ * Messages: for chat models, as well as the prompt template, you may have a history of prior chat messages from the same
+ conversation forming part of the input to the next generation. Datapoints can have these messages included as part of the
+ input.
+ * Target: data representing the expected or intended output of the model. In the simplest case, this can simply be a string
+ representing the exact output you hope the model produces for the example represented by the datapoint. In more complex cases,
+ you can define an arbitrary JSON object for target with whatever fields are necessary to help you specify the intended
+ behaviour. You can then use our evaluations [./evaluate-your-model] feature to run the necessary code to compare the actual
+ generated output with your target data to determine whether the result was as expected.
+
+Datapoints are pre-defined input-output pairs. [file:250dded1-5204-4b31-934d-14f2cef065e1]
+
+Datasets can be created via CSV upload, converting from existing Logs in your project, or by API requests.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/overview",
+ "title": "Overview",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -24317,6 +37694,196 @@ You can currently create Datasets in Humanloop in three ways: from existing **lo
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Datasets",
+ ],
+ "content": "You can currently create Datasets in Humanloop in three ways: from existing logs, by uploading a CSV or via the API.
+
+
+CREATE A DATASET FROM LOGS
+
+Prerequisites:
+
+ * A Prompt [/docs/prompts] in Humanloop
+ * Some Logs [./generate-and-log-with-the-sdk] available in that Prompt
+
+To create a Dataset from existing Logs:
+
+
+GO TO THE LOGS TAB
+
+
+SELECT A SUBSET OF THE LOGS
+
+
+CHOOSE ADD TO DATASET
+
+In the menu in the top right of the page, select Add to dataset.
+
+Select some logs and then click **Add to Dataset** [file:7c8f572c-ca0e-4ba4-aae4-8641628bcba0]
+
+
+ADD TO A NEW OR EXISTING DATASET
+
+Provide a name of the new dataset and click Create, or you can click add to existing dataset to append the selected to a dataset
+you already have.
+
+
+UPLOAD DATA FROM CSV
+
+Prerequisites:
+
+ * A Prompt [/docs/prompts] in Humanloop
+
+To create a dataset from a CSV file, we'll first create a CSV in Google Sheets and then upload it to a dataset in Humanloop.
+
+### Create a CSV file. - In our Google Sheets example below, we have a column called \`user_query\` which is an input to a prompt
+variable of that name. So in our model config, we'll need to include \`{{ user_query }}\` somewhere, and that placeholder will be
+populated with the value from the \`user_query\` input in the datapoint at generation-time. - You can include as many columns of
+prompt variables as you need for your model configs. - There is additionally a column called \`target\` which will populate the
+target of the datapoint. In this case, we use simple strings to define the target. - Note: \`messages\` are harder to incorporate
+into a CSV file as they tend to be verbose and hard-to-read JSON. If you want a dataset with messages, consider using the API to
+upload, or convert from existing logs.
+
+A CSV file in Google Sheets defining a collection of 9 datapoints. [file:614d15f5-ae0a-416c-bd14-d7cd3ced8594]
+
+
+EXPORT THE GOOGLE SHEET TO CSV
+
+Choose File → Download → Comma-separated values (.csv)
+
+
+CREATE A NEW DATASET FILE
+
+
+CLICK UPLOAD CSV
+
+Uupload the CSV file from step 2 by drag-and-drop or using the file explorer.
+
+Uploading a CSV file to create a dataset. [file:d801c45b-910b-40a3-a244-5dbd15c2864e]
+
+
+CLICK UPLOAD DATASET FROM CSV
+
+You should see a new dataset appear in the datasets tab. You can explore it by clicking in.
+
+
+FOLLOW THE LINK IN THE POP-UP TO INSPECT THE DATASET THAT WAS CREATED IN THE UPLOAD.
+
+You'll see a column with the input key-value pairs for each datapoint, a messages column (in our case we didn't use messages, so
+they're all empty) and a target column with the expected model output.
+
+[file:dd05870c-8436-4ce0-9eab-2b1206d5d3ef]
+
+
+UPLOAD VIA API
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+### First define some sample data
+
+This should consist of user messages and target extraction pairs. This is where you could load up any existing data you wish to
+use for your evaluation:
+
+# Example test case data
+data = [
+ {
+ "messages": [
+ {
+ "role": "user",
+ "content": "Hi Humanloop support team, I'm having trouble understanding how to use the evaluations feature in your software. Can you provide a step-by-step guide or any resources to help me get started?",
+ }
+ ],
+ "target": {"feature": "evaluations", "issue": "needs step-by-step guide"},
+ "inputs": {},
+ },
+ {
+ "messages": [
+ {
+ "role": "user",
+ "content": "Hi there, I'm interested in fine-tuning a language model using your software. Can you explain the process and provide any best practices or guidelines?",
+ }
+ ],
+ "target": {
+ "feature": "fine-tuning",
+ "issue": "process explanation and best practices",
+ },
+ "inputs": {},
+ },
+]
+
+
+
+THEN DEFINE A DATASET AND UPLOAD THE DATAPOINTS
+
+# Create a dataset
+dataset = humanloop.datasets.create(
+ project_id=project_id,
+ name="Sample dataset",
+ description="Examples of featue requests extracted from user messages",
+)
+dataset_id = dataset.id
+
+# Create datapoints for the dataset
+datapoints = humanloop.datasets.create_datapoint(
+ dataset_id=dataset_id,
+ body=data,
+)
+
+
+On the datasets tab in your Humanloop project you will now see the dataset you just uploaded via the API.
+
+[file:985a94eb-b14e-45ab-b3f6-34b881be3db6]",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/create-dataset",
+ "title": "Create a dataset",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -24587,6 +38154,86 @@ This guide demonstrates how to run a batch generation across all the datapoints
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Datasets",
+ ],
+ "content": "This guide demonstrates how to run a batch generation across all the datapoints in a dataset.
+
+Prerequistes
+
+ * A Prompt [/docs/prompts]) in Humanloop
+ * A dataset [/docs/datasets] in that project
+
+
+CREATE A MODEL CONFIG
+
+It's important that the model config we use to perform the batch generation is consistent with the dataset. We're going to use the
+simple customer support dataset that we uploaded in the previous Create a dataset guide [./create-a-dataset]. As a reminder, the
+dataset looks like this
+
+The underlying data for our \`customer_queries\` dataset. [file:614d15f5-ae0a-416c-bd14-d7cd3ced8594]
+
+We want to get the model to classify the customer support query into the appropriate category. For this dataset, we have specified
+the correct category for each datapoint, so we'll be able to know easily if the model produced the correct output.
+
+### In Editor, create a simple completion model config as below. [file:d11083f1-60c0-4936-a5d9-2e55777796c4]
+
+We've used the following prompt:
+
+You are a customer support classifier for Humanloop, a platform for building applications with LLMs.
+
+Please classify the following customer support query into one of these categories: [datasets, docs, evaluators, feedback,
+fine-tuning, model configs, model providers]
+
+{{user_query}}
+
+The most important thing here is that we have included a prompt variable - {{ user_query }} which corresponds to the input key on
+all the datapoints in our dataset. This was the first column header in the CSV file we used to upload the dataset.
+
+
+SAVE THE MODEL CONFIG BY CLICKING THE SAVE BUTTON. CALL THE CONFIG SUPPORT_CLASSIFIER.
+
+
+GO TO THE DATASETS TAB
+
+
+CLICK THE MENU ICON IN THE TOP-RIGHT CORNER OF THE DATASET YOU WANT TO PERFORM A BATCH GENERATION ACROSS.
+
+
+IN THAT MENU, CHOOSE BATCH GENERATE & EVAL
+
+Trigger a batch generation on a dataset from this menu. [file:0656e01f-3445-4248-940c-2b9b89e88b23]
+
+
+IN THE DIALOG WINDOW, CHOOSE THE SUPPORT_CLASSIFIER MODEL CONFIG CREATED IN STEP 2.
+
+
+YOU CAN ALSO OPTIONALLY SELECT AN EVALUATOR TO USE TO COMPARE THE MODEL'S GENERATION OUTPUT TO THE TARGET OUTPUT IN EACH
+DATAPOINT. WE SET UP THE EXACT MATCH OFFLINE EVALUATOR IN OUR PROJECT (IT'S ONE OF THE BUILTINS AND REQUIRES NO FURTHER
+CONFIGURATION).
+
+
+CLICK BATCH GENERATE
+
+
+FOLLOW THE LINK IN THE POP-UP TO THE BATCH GENERATION RUN WHICH IS UNDER THE EVALUATIONS TAB.
+
+The batch generate output view, including an **exact match** evaluator. [file:e3f6d021-567c-443e-86bf-8f06ede42b3e]
+
+The output the model produced is shown in the output column, and the exact match column shows that the model produced the expected
+(target) output in most cases. From here, we could inspect the failing cases and iterate on our model config before testing again
+to see if the accuracy across the whole dataset has improved.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/batch-generate",
+ "title": "Batch generate",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -24688,6 +38335,28 @@ This enables you to try out alternative prompts or models and use the feedback f
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Experiments",
+ ],
+ "content": "Experiments allow you to set up A/B test between multiple different Prompts [/docs/prompts].
+
+Experiments can be used to compare different prompt templates, different parameter combinations (such as temperature and presence
+penalties) and even different base models.
+
+This enables you to try out alternative prompts or models and use the feedback from your users to determine which works better.
+
+[file:61aa4893-1eae-4422-9577-157a9ca75b37]",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/run-an-experiment",
+ "title": "Overview",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -24713,6 +38382,97 @@ Experiments can be used to compare different prompt templates, parameter combina
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Experiments",
+ ],
+ "content": "Experiments can be used to compare different prompt templates, parameter combinations (such as temperature and presence
+penalties), and even base models.
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/guides/create-prompt] guide first.
+ * You have integrated humanloop.complete_deployed() or the humanloop.chat_deployed() endpoints, along with the
+ humanloop.feedback() with the API [https://www.postman.com/humanloop/workspace/humanloop] or Python SDK
+ [./generate-and-log-with-the-sdk].
+
+This guide assumes you're using an OpenAI model. If you want to use other providers or your model, refer to the [guide for running
+an experiment with your model provider](./use-your-own-model-provider).
+
+
+CREATE AN EXPERIMENT
+
+
+NAVIGATE TO THE EXPERIMENTS TAB OF YOUR PROMPT
+
+
+CLICK THE CREATE NEW EXPERIMENT BUTTON
+
+ 1. Give your experiment a descriptive name.
+ 2. Select a list of feedback labels to be considered as positive actions - this will be used to calculate the performance of each
+ of your model configs during the experiment.
+ 3. Select which of your project’s model configs to compare.
+ 4. Then click the Create button.
+
+[file:b1775cb8-a4df-4efb-889c-185ad6ccb300]
+
+
+SET THE EXPERIMENT LIVE
+
+Now that you have an experiment, you need to set it as the project’s active experiment:
+
+### Navigate to the **Experiments** tab. Of a Prompt go to the **Experiments** tab.
+
+
+CHOOSE THE EXPERIMENT CARD YOU WANT TO DEPLOY.
+
+
+CLICK THE DEPLOY BUTTON
+
+Next to the Environments label, click the Deploy button.
+
+
+SELECT THE ENVIRONMENT TO DEPLOY THE EXPERIMENT.
+
+We only have one environment by default so select the 'production' environment.
+
+[file:0336740b-fe2e-4415-9e71-556b9ab19d06] Now that your experiment is active, any SDK or API calls to generate will sample model
+configs from the list you provided when creating the experiment and any subsequent feedback captured using feedback will
+contribute to the experiment performance.
+
+
+MONITOR EXPERIMENT PROGRESS
+
+Now that an experiment is live, the data flowing through your generate and feedback calls will update the experiment progress in
+real-time:
+
+### Navigate back to the **Experiments** tab.
+
+
+SELECT THE EXPERIMENT CARD
+
+Here you will see the performance of each model config with a measure of confidence based on how much feedback data has been
+collected so far:
+
+[file:0336740b-fe2e-4415-9e71-556b9ab19d06] You can toggle on and off existing model configs and choose to add new model configs
+from your project over the lifecycle of an experiment [file:78117389-e302-4b91-a663-e7559d9f0093]
+
+🎉 Your experiment can now give you insight into which of the model configs your users prefer.
+
+How quickly you can draw conclusions depends on how much traffic you have flowing through your project.
+
+Generally, you should be able to draw some initial conclusions after on the order of hundreds of examples.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/experiments-from-the-app",
+ "title": "Run an experiment",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -24888,6 +38648,113 @@ Experiments can be used to compare different prompt templates, different paramet
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Experiments",
+ ],
+ "content": "Experiments can be used to compare different prompt templates, different parameter combinations (such as temperature and presence
+penalties) and even different base models.
+
+This guide focuses on the case where you wish to manage your own model provider calls.
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/guides/create-prompt] guide first.
+ * You have integrated humanloop.complete_deployed() or the humanloop.chat_deployed() endpoints, along with the
+ humanloop.feedback() with the API [https://www.postman.com/humanloop/workspace/humanloop] or Python SDK
+ [./generate-and-log-with-the-sdk].
+
+This guide assumes you're are using an OpenAI model. If you want to use other providers or your own model please also look at the
+[guide for running an experiment with your own model provider](./use-your-own-model-provider).
+
+Support for other model providers on Humanloop is coming soon.
+
+
+CREATE AN EXPERIMENT
+
+### Navigate to the **Experiments** tab of your project. ### Click the **Create new experiment** button: 1. Give your experiment a
+descriptive name. 2. Select a list of feedback labels to be considered as positive actions - this will be used to calculate the
+performance of each of your model configs during the experiment. 3. Select which of your project’s model configs you wish to
+compare. Then click the **Create** button. [file:df4eb837-bc03-4d5c-9bd1-b4bec7339b47]
+
+
+LOG TO YOUR EXPERIMENT
+
+In order to log data for your experiment without using humanloop.complete_deployed() or humanloop.chat_deployed(), you must first
+determine which model config to use for your LLM provider calls. This is where the humanloop.experiments.get_model_config()
+function comes in.
+
+
+GO TO YOUR PROMPT DASHBOARD
+
+
+SET THE EXPERIMENT AS THE ACTIVE DEPLOYMENT.
+
+To do so, find the default environment in the Deployments bar. Click the dropdown menu from the default environment and from those
+options select Change deployment. In the dialog that opens select the experiment you created.
+
+[file:e26e6630-4fa1-4e0f-8a05-8f6da3b73b2f]
+
+
+COPY YOUR PROJECT_ID
+
+From the URL, https://app.humanloop.com/projects//dashboard. The project ID starts with pr_.
+
+
+ALTER YOUR EXISTING LOGGING CODE
+
+To now first sample a model_config from your experiment to use when making your call to OpenAI:
+
+from humanloop import Humanloop
+import openai
+
+# Initialize the SDK with your Humanloop API key
+humanloop = Humanloop(api_key="")
+
+# Sample a model_config from your experiment.
+model_config_response = humanloop.projects.get_active_config(id=project_id)
+model_config = model_config_response.config
+
+# Make a generation using OpenAI using the parameters from the sampled model_config.
+response = openai.Completion.create(
+ prompt="Answer the following question like Paul Graham from YCombinator:\\n"
+ "How should I think about competition for my startup?",
+ model=model_config["model"],
+ temperature=model_config["temperature"],
+)
+
+# Parse the output from the OpenAI response.
+output = response.choices[0].text
+
+# Log the inputs and outputs to the experiment trial associated to the sampled model_config.
+log_response = humanloop.log(
+ project_id=project_id,
+ inputs={"question": "How should I think about competition for my startup?"},
+ output=output,
+ trial_id=model_config["trial_id"],
+)
+
+# Use this ID to associate feedback received later to this log.
+data_id = log_response.id
+
+
+You can also run multiple experiments within a single project. In this case, first navigate to the Experiments tab of your project
+and select your Experiment card. Then, retrieve your experiment_id from the experiment summary:
+
+[file:5c67f5cd-4a65-4064-9e18-392a4020fe03]
+
+Then, retrieve your model config from your experiment by calling humanloop.experiments.sample(experiment_id=experiment_id).",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/run-an-experiment-with-your-own-model-provider",
+ "title": "Run experiments managing your own model",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -25054,6 +38921,109 @@ Humanloop's Editor supports the usage of [OpenAI function calling](https://platf
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Tools",
+ ],
+ "content": "Humanloop's Editor supports the usage of OpenAI function calling
+[https://platform.openai.com/docs/guides/function-calling/function-calling], which we refer to as JSON Schema tools. JSON Schema
+tools follow the universal JSON Schema syntax [https://json-schema.org/] definition, similar to OpenAI function calling. You can
+define inline JSON Schema tools as part of your model configuration in the editor. These tools allow you to define a structure for
+OpenAI to follow when responding. In this guide, we'll walk through the process of using tools in the editor to interact with
+gpt-4.
+
+----------------------------------------------------------------------------------------------------------------------------------
+
+
+CREATE A TOOL
+
+
+PREREQUISITES
+
+ * A Humanloop account - you can create one by going to our sign up page.
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/guides/create-prompt] guide first.
+
+To view the list of models that support Tool calling, see the [Models page](/docs/supported-models#models).
+
+To create and use a tool follow the following steps:
+
+### **Open the editor** Start by opening the Humanloop Editor in your web browser. You can access this directly from your
+Humanloop account dashboard.
+
+
+SELECT THE MODEL
+
+In the editor, you'll see an option to select the model. Choose gpt-4 from the dropdown list.
+
+
+DEFINE THE TOOL
+
+To define a tool, you'll need to use the universal JSON Schema syntax [https://json-schema.org/] syntax. For the purpose of this
+guide, let's select one of our preloaded example tools get_current_weather. In practice this would correspond to a function you
+have defined locally, in your own code, and you are defining the parameters and structure that you want OpenAI to respond with to
+integrate with that function.
+
+[file:0d148432-70f4-4c8b-91aa-23d2854e8331]
+
+
+INPUT USER TEXT
+
+Let's input some user text relevant to our tool to trigger OpenAI to respond with the corresponding parameters. Since we're using
+a weather-related tool, type in: What's the weather in Boston?.
+
+It should be noted that a user can ask a non-weather related question such as 'how are you today? ' and it likely wouldn't trigger
+the model to respond in a format relative to the tool.
+
+
+CHECK ASSISTANT RESPONSE
+
+If correctly set up, the assistant should respond with a prompt to invoke the tool, including the name of the tool and the data it
+requires. For our get_current_weather tool, it might respond with the relevant tool name as well as the fields you requested, such
+as:
+
+get_current_weather
+
+{
+ "location": "Boston"
+}
+
+
+
+INPUT TOOL PARAMETERS
+
+The response can be used locally or for prototyping you can pass in any relevant values. In the case of our get_current_weather
+tool, we might respond with parameters such as temperature (e.g., 22) and weather condition (e.g., sunny). To do this, in the tool
+response add the parameters in the in the format { "temperature": 22, "condition": "sunny" }. To note, the response format is also
+flexible, inputting 22, sunny likely also works and might help you iterate more quickly in your experimentation.
+
+
+SUBMIT TOOL RESPONSE
+
+After defining the parameters, click on the 'Run' button to send the Tool message to OpenAI.
+
+
+REVIEW ASSISTANT RESPONSE
+
+The assistant should now respond using your parameters. For example, it might say: The current weather in Boston is sunny with a
+temperature of 22 degrees.
+
+[file:638fd12b-40d5-4e3a-845a-ad4a6c767438]
+
+
+SAVE THE MODEL CONFIG
+
+If you are happy with your tool, you can save the model config. The tool will be saved on that model config and can be used again
+in the future by loading the model config again in the editor or by calling the model config via our SDK.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/tool-calling",
+ "title": "Tool Calling in Editor",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -25170,6 +39140,386 @@ The Humanloop SDK provides an easy way for you to integrate the functionality of
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Tools",
+ ],
+ "content": "The Humanloop SDK provides an easy way for you to integrate the functionality of OpenAI function calling
+[https://platform.openai.com/docs/guides/function-calling/function-calling], which we refer to as JSON Schema tools, into your
+existing projects. Tools follow the same universal JSON Schema syntax [https://json-schema.org/] definition as OpenAI function
+calling. In this guide, we'll walk you through the process of using tools with the Humanloop SDK via the chat endpoint.
+
+----------------------------------------------------------------------------------------------------------------------------------
+
+
+CREATING A TOOL
+
+
+PREREQUISITES
+
+ * A Humanloop account - you can create one by going to our sign up page.
+ * Python installed - you can download and install Python by following the steps on the Python download page
+ [https://www.python.org/downloads/].
+
+This guide assumes you're using OpenAI with the \`gpt-4\` model. Only specific models from OpenAI are supported for function
+calling.
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop TypeScript SDK:
+
+ npm install humanloop
+
+
+ 2. Import and initialize the SDK:
+
+ import { HumanloopClient, Humanloop } from "humanloop";
+
+ const humanloop = new HumanloopClient({ apiKey: "YOUR_API_KEY" });
+
+ // Check that the authentication was successful
+ console.log(await humanloop.prompts.list());
+
+
+First you need to install and initialize the SDK. If you have already done this, skip to the next section. Otherwise, open up your
+terminal and follow these steps:
+
+ 1. Install the Humanloop Python SDK:
+
+ pip install humanloop
+
+
+ 2. Start a Python interpreter:
+
+ python
+
+
+ 3. Initialize the SDK with your Humanloop API key (get your API key from your Organisation Settings page
+ [https://app.humanloop.com/account/api-keys])
+
+ from humanloop import Humanloop
+ hl = Humanloop(api_key="")
+
+ # Check that the authentication was successful
+ print(hl.prompts.list())
+
+
+
+INSTALL AND INITIALIZE THE SDK
+
+The SDK requires Python 3.8 or greater.
+
+### **Import the Humanloop SDK**: If you haven't done so already, you'll need to install and import the Humanloop SDK into your
+Python environment. You can do this using pip:
+
+pip install humanloop
+
+
+Note, this guide was built with Humanloop==0.5.18.
+
+Then import the SDK in your script:
+
+from humanloop import Humanloop
+
+
+
+INITIALIZE THE SDK: INITIALIZE THE HUMANLOOP SDK WITH YOUR API KEY:
+
+from humanloop import Humanloop
+
+hl = Humanloop(api_key="")
+
+
+
+CREATE A CHAT WITH THE TOOL: WE'LL START WITH THE GENERAL CHAT ENDPOINT FORMAT.
+
+from humanloop import Humanloop
+
+hl = Humanloop(api_key="")
+
+
+def run_conversation():
+ # Step 1: send the conversation and available functions to GPT
+ messages = [{"role": "user", "content": "What's the weather like in Boston?"}]
+
+ # TODO - Add tools definition here
+
+ response = hl.chat(
+ project="Assistant",
+ model_config={"model": "gpt-4", "max_tokens": 100},
+ messages=messages,
+ )
+ response = response.data[0]
+
+
+
+DEFINE THE TOOL: DEFINE A TOOL USING THE UNIVERSAL JSON SCHEMA SYNTAX [https://json-schema.org/] SYNTAX. LET'S ASSUME WE'VE
+DEFINED A GET_CURRENT_WEATHER TOOL, WHICH RETURNS THE CURRENT WEATHER FOR A SPECIFIED LOCATION. WE'LL ADD IT IN VIA A "TOOLS":
+TOOLS, FIELD. WE'VE ALSO DEFINED A DUMMY GET_CURRENT_WEATHER METHOD AT THE TOP. THIS CAN BE REPLACED BY YOUR OWN FUNCTION TO FETCH
+REAL VALUES, FOR NOW WE'RE HARDCODING IT TO RETURN A RANDOM TEMPERATURE AND CLOUDY FOR THIS EXAMPLE.
+
+from humanloop import Humanloop
+import random
+import json
+
+hl = Humanloop(api_key="")
+
+def get_current_weather(location, unit):
+ # Your own function call logic
+ # We will return dummy values in this example
+
+ # Generate random temperature between 0 and 20
+ temperature = random.randint(0, 20)
+
+ return {"temperature": temperature, "other": "cloudy"}
+
+
+
+def run_conversation():
+ # Step 1: send the conversation and available functions to GPT
+ messages = [
+ {
+ "role": "user",
+ "content": "What's the weather like in both Boston AND London tonight?",
+ }
+ ]
+ tools = [
+ {
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state, e.g. San Francisco, CA",
+ },
+ "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+ },
+ "required": ["location"],
+ },
+ },
+ ]
+
+ response = hl.chat(
+ project="Assistant",
+ model_config={"model": "gpt-3.5-turbo-1106", "tools": tools, "max_tokens": 100},
+ messages=messages,
+ )
+ response = response.body
+ output_message = response["data"][0]["output_message"]
+
+ # Remove the deprecated tool_call field (not nessecary for SDK rc verions >0.6)
+ del output_message["tool_call"]
+
+ # Add the output messge from the previous chat to the messages
+ messages.append(output_message)
+
+ # TODO - Add assistant response logic
+
+
+
+CHECK ASSISTANT RESPONSE
+
+The code above will make the call to OpenAI with the tool but it does nothing to handle the assistant response. When responding
+with a tool response the response should have a tool_calls field. Fetch that value and pass it to your own function. An example of
+this can be seen below. Replace the TODO - Add assistant handling logic in your code from above with the following. Multiple tool
+calls can be returned with the latest OpenAI models gpt-4-1106-preview and gpt-3.5-turbo-1106, so below we loop through the
+tool_calls and populate the response accordingly.
+
+ # Step 2: check if GPT wanted to call a tool
+ if output_message.get("tool_calls"):
+ # Step 3: call the function
+ # Note: the JSON response may not always be valid; be sure to handle errors
+ available_functions = {
+ "get_current_weather": get_current_weather,
+ }
+
+ for tool_call in output_message["tool_calls"]:
+ function_name = tool_call["function"]["name"]
+ function_args = json.loads(tool_call["function"]["arguments"])
+ function_to_call = available_functions[function_name]
+ function_response = function_to_call(
+ location=function_args.get("location"),
+ unit=function_args.get("unit"),
+
+ # TODO - return the tool response back to OpenAI
+
+
+
+RETURN THE TOOL RESPONSE
+
+We can then return the tool response to OpenAI. This can be done by formatting OpenAI tool message into the relative assistant
+message seen below along with a tool message with the function name and function response.
+
+ # Step 2: check if GPT wanted to call a tool
+ if output_message.get("tool_calls"):
+ # Step 3: call the function
+ # Note: the JSON response may not always be valid; be sure to handle errors
+ available_functions = {
+ "get_current_weather": get_current_weather,
+ }
+
+ for tool_call in output_message["tool_calls"]:
+ function_name = tool_call["function"]["name"]
+ function_args = json.loads(tool_call["function"]["arguments"])
+ function_to_call = available_functions[function_name]
+ function_response = function_to_call(
+ location=function_args.get("location"),
+ unit=function_args.get("unit"),
+ )
+
+ # Step 4: send the response back to the model per function call
+ messages.append(
+ {
+ "role": "tool",
+ "content": json.dumps(function_response),
+ "tool_call_id": tool_call["id"],
+ }
+ )
+
+ second_response = hl.chat(
+ project="Assistant",
+ model_config={
+ "model": "gpt-3.5-turbo-1106",
+ "tools": tools,
+ "max_tokens": 500,
+ },
+ messages=messages,
+ )
+ return second_response
+
+
+
+REVIEW ASSISTANT RESPONSE
+
+The assistant should respond with a message that incorporates the parameters you provided, for example: The current weather in
+Boston is 22 degrees and cloudy. The above can be run by adding the python handling logic at the both of your file:
+
+if __name__ == "__main__":
+ response = run_conversation()
+ response = response.data[0].output
+ # Print to console the response from OpenAI with the formatted message
+ print(response)
+
+
+The full code from this example can be seen below:
+
+from humanloop import Humanloop
+import random
+import json
+
+hl = Humanloop(
+ api_key="",
+)
+
+
+def get_current_weather(location, unit):
+ # Your own function call logic
+ # We will return dummy values in this example
+
+ # Generate random temperature between 0 and 20
+ temperature = random.randint(0, 20)
+
+ return {"temperature": temperature, "other": "cloudy"}
+
+
+def run_conversation():
+ # Step 1: send the conversation and available functions to GPT
+ messages = [
+ {
+ "role": "user",
+ "content": "What's the weather like in both Boston AND London tonight?",
+ }
+ ]
+ tools = [
+ {
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "description": "The city and state, e.g. San Francisco, CA",
+ },
+ "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]},
+ },
+ "required": ["location"],
+ },
+ },
+ ]
+
+ response = hl.chat(
+ project="Assistant",
+ model_config={"model": "gpt-3.5-turbo-1106", "tools": tools, "max_tokens": 100},
+ messages=messages,
+ )
+ response = response.body
+ output_message = response["data"][0]["output_message"]
+
+ # Remove the deprecated tool_call field (not nessecary for SDK rc verions >0.6)
+ del output_message["tool_call"]
+
+ # Add the output messge from the previous chat to the messages
+ messages.append(output_message)
+
+ # Step 2: check if GPT wanted to call a tool
+ if output_message.get("tool_calls"):
+ # Step 3: call the function
+ # Note: the JSON response may not always be valid; be sure to handle errors
+ available_functions = {
+ "get_current_weather": get_current_weather,
+ }
+
+ for tool_call in output_message["tool_calls"]:
+ function_name = tool_call["function"]["name"]
+ function_args = json.loads(tool_call["function"]["arguments"])
+ function_to_call = available_functions[function_name]
+ function_response = function_to_call(
+ location=function_args.get("location"),
+ unit=function_args.get("unit"),
+ )
+
+ # Step 4: send the response back to the model per function call
+ messages.append(
+ {
+ "role": "tool",
+ "content": json.dumps(function_response),
+ "tool_call_id": tool_call["id"],
+ }
+ )
+
+ second_response = hl.chat(
+ project="Assistant",
+ model_config={
+ "model": "gpt-3.5-turbo-1106",
+ "tools": tools,
+ "max_tokens": 500,
+ },
+ messages=messages,
+ )
+ return second_response
+
+
+if __name__ == "__main__":
+ response = run_conversation()
+ response = response.data[0]output
+ # Print to console the response from OpenAI with the formatted message
+ print(response)
+
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/create-a-tool-with-the-sdk",
+ "title": "Tool Calling with the SDK",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -25604,6 +39954,180 @@ Importantly, updates to the \`get_current_weather\` \`JSON Schema\` tool defined
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Tools",
+ ],
+ "content": "It's possible to re-use tool definitions them across multiple Prompts. You achieve this by having a Prompt file which defines a
+JSON schema, and linking them to your Prompt.
+
+You can achieve this by first defining an instance of a JSON Schema tool in your global Tools tab. Here you can define a tool
+once, such as get_current_weather(location: string, unit: 'celsius' | 'fahrenheit'), and then link that to as many model configs
+as you need within the Editor as shown below.
+
+Importantly, updates to the get_current_weather JSON Schema tool defined here will then propagate automatically to all the model
+configs you've linked it to, without having to publish new versions of the prompt.
+
+
+PREREQUISITES
+
+ * A Humanloop account - you can create one by going to our sign up page.
+ * Be on a paid plan - your organization has been upgraded from the Free tier.
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/guides/create-prompt] guide first.
+
+To create a JSON Schema tool that can be reusable across your organization, follow the following steps:
+
+
+CREATING AND LINKING A JSON SCHEMA TOOL
+
+This feature is not available for the Free tier. Please contact us if you wish to learn more about our [Enterprise
+plan](https://humanloop.com/pricing) ### Create a Tool file
+
+Click the 'New File' button on the homepage or in the sidebar.
+
+
+SELECT THE JSON SCHEMA TOOL TYPE
+
+
+DEFINE YOUR TOOL
+
+Set the name, description, and parameters values. Our guide for using Tool Calling in the Prompt Editor
+[./create-a-tool-in-the-editor] can be a useful reference in this case. We can use the get_current_weather schema in this case.
+Paste the following into the dialog:
+
+{
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "name": "Location",
+ "description": "The city and state, e.g. San Francisco, CA"
+ },
+ "unit": {
+ "type": "string",
+ "name": "Unit",
+ "enum": ["celsius", "fahrenheit"]
+ }
+ },
+ "required": ["location"]
+ }
+}
+
+
+
+PRESS THE CREATE BUTTON.
+
+
+NAVIGATE TO THE EDITOR
+
+Make sure you are using a model that supports tool calling, such as gpt-4o.
+
+See the Models page [/docs/supported-models] for a list of models that support tool calling.
+
+
+ADD TOOL TO THE PROMPT DEFINITION.
+
+
+SELECT 'LINK EXISTING TOOL'
+
+In the dropdown, go to the Link existing tool option. You should see your get_current_weather tool, click on it to link it to your
+editor.
+
+[file:82b8db60-27bd-4436-bb3c-8f1da79407e9]
+
+
+TEST THAT THE PROMPT IS WORKING WITH THE TOOL
+
+Now that your tool is linked you can start using it as you would normally use an inline tool. In the Chat section, in the User
+input, enter "What is the weather in london?"
+
+Press the Run button.
+
+You should see the Assistant respond with the tool response and a new Tool field inserted to allow you to insert an answer. In
+this case, put in 22 into the tool response and press Run.
+
+[file:1835f4ab-748e-4a64-8764-f69adb82d602]
+
+The model will respond with The current weather in London is 22 degrees.
+
+
+SAVE THE PROMPT
+
+You've linked a tool to your model config, now let's save it. Press the Save button and name your model config
+weather-model-config.
+
+
+(OPTIONAL) UPDATE THE TOOL
+
+Now that's we've linked your get_current_weather tool to your model config, let's try updating the base tool and see how it
+propagates the changes down into your saved weather-model-config config. Navigate back to the Tools in the sidebar and go to the
+Editor.
+
+
+CHANGE THE TOOL.
+
+Let's update both the name, as well as the required fields. For the name, update it to get_current_weather_updated and for the
+required fields, add unit as a required field. The should look like this now:
+
+{
+ "name": "get_current_weather_updated",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "name": "Location",
+ "description": "The city and state, e.g. San Francisco, CA"
+ },
+ "unit": {
+ "type": "string",
+ "name": "Unit",
+ "enum": ["celsius", "fahrenheit"]
+ }
+ },
+ "required": ["location", "unit"]
+ }
+}
+
+
+
+SAVE THE TOOL
+
+Press the Save button, then the following Continue button to confirm.
+
+Your tool is now updated.
+
+
+TRY THE PROMPT AGAIN
+
+Navigate back to your previous project, and open the editor. You should see the weather-model-config loaded as the active config.
+You should also be able to see the name of your previously linked tool in the Tools section now says get_current_weather_updated.
+
+In the Chat section enter in again, What is the weather in london?, and press Run again.
+
+
+CHECK THE RESPONSE
+
+You should see the updated tool response, and how it now contains the unit field. Congratulations, you've successfully linked a
+JSON Schema tool to your model config.
+
+[file:d564f7b0-6b6c-4c89-b1ee-fab1311b93a1] When updating your organization-level JSON Schema tools, remember that the change will
+affect all the places you've previously linked the tool. Be careful when making updates to not inadvertently change something you
+didn't intend.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/link-jsonschema-tool",
+ "title": "Link a JSON Schema Tool",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -25813,6 +40337,130 @@ Instead of needing to copy and paste between your editor sessions and keep track
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Tools",
+ ],
+ "content": "The Humanloop Snippet tool supports managing common text 'snippets' (or 'passages', or 'chunks') that you want to reuse across
+your different prompts. A Snippet tool acts as a simple key/value store, where the key is the name of the common re-usable text
+snippet and the value is the corresponding text.
+
+For example, you may have some common persona descriptions that you found to be effective across a range of your LLM features. Or
+maybe you have some specific formatting instructions that you find yourself re-using again and again in your prompts.
+
+Instead of needing to copy and paste between your editor sessions and keep track of which projects you edited, you can instead
+inject the text into your prompt using the Snippet tool.
+
+----------------------------------------------------------------------------------------------------------------------------------
+
+
+CREATE AND USE A SNIPPET TOOL
+
+
+PREREQUISITES
+
+ * A Humanloop account - you can create one by going to our sign up page.
+ * Be on a paid plan - your organization has been upgraded from the Free tier.
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/guides/create-prompt] guide first.
+
+The Snippet tool is not available for the Free tier. Please contact us if you wish to learn more about our [Enterprise
+plan](https://humanloop.com/pricing)
+
+To create and use a snippet tool, follow the following steps:
+
+### Navigate to the [tools tab](https://app.humanloop.com/hl-test/tools) in your organisation and select the Snippet tool card.
+[file:f864c584-0082-4422-b02c-8af2372e4458]
+
+
+NAME THE TOOL
+
+Name itassistant-personalities and give it a description Useful assistant personalities.
+
+
+ADD A SNIPPET CALLED "HELPFUL-ASSISTANT"
+
+In the initial box add helpful-assistant and give it a value of You are a helpful assistant. You like to tell jokes and if anyone
+asks your name is Sam.
+
+
+ADD ANOTHER SNIPPET CALLED "GRUMPY-ASSISTANT"
+
+Let's add another key-value pair, so press the Add a key/value pair button and add a new key of grumpy-assistant and give it a
+value of You are a grumpy assistant. You rarely try to help people and if anyone asks your name is Freddy..
+
+[file:0297c12d-7572-4b93-8204-d9553cfc7afe]
+
+
+PRESS CREATE TOOL.
+
+Now your Snippets are set up, you can use it to populate strings in your prompt templates across your projects.
+
+
+NAVIGATE TO THE EDITOR
+
+Go to the Editor of your previously created project.
+
+
+ADD {{ ASSISTANT-PERSONALITIES(KEY) }} TO YOUR PROMPT
+
+Delete the existing prompt template and add {{ assistant-personalities(key) }} to your prompt.
+
+Double curly bracket syntax is used to call a tool in the editor. Inside the curly brackets you put the tool name, e.g. \`{{ (key)
+}}\`.
+
+
+ENTER THE KEY AS AN INPUT
+
+In the input area set the value to helpful-assistant. The tool requires an input value to be provided for the key. When adding the
+tool an inputs field will appear in the top right of the editor where you can specify your key.
+
+
+PRESS THE RUN BUTTON
+
+Start the chat with the LLM and you can see the response of the LLM, as well as, see the key you previously defined add in the
+Chat on the right.
+
+[file:52c5db5b-1863-41e9-a5da-f86b9219505b]
+
+
+CHANGE THE KEY TO GRUMPY-ASSISTANT.
+
+If you want to see the corresponding snippet to the key you either need to first run the conversation to fetch the string and see
+it in the preview.
+
+
+PLAY WITH THE LLM
+
+Ask the LLM, I'm a customer and need help solving this issue. Can you help?'. You should see a grumpy response from "Freddy" now.
+
+If you have a specific key you would like to hardcode in the prompt, you can define it using the literal key value: {{
+("key") }}, so in this case it would be {{ assistant-personalities("grumpy-assistant") }}. Delete the
+grumpy-assistant field and add it into your chat template.
+
+
+SAVE YOUR PROMPT.
+
+If you're happy with you're grumpy assistant, save this new version of your Prompt.
+
+[file:79b12d9b-b906-4b77-9ae3-6e49da4ba952]
+
+The Snippet tool is particularly useful because you can define passages of text once in a Snippet tool and reuse them across
+multiple prompts, without needing to copy/paste them and manually keep them all in sync. Editing the values in your tool allows
+the changes to automatically propagate to the model configs when you update them, as long as the key is the same.
+
+Since the values for a Snippet are saved on the Tool, not the Prompt, changing the values (or keys) defined in your Snippet tools
+could affect the relative propmt's behaviour that won't be captured by the Prompt's version. This could be exactly what you
+intend, however caution should still be used make sure the changes are expected.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/snippet-tool",
+ "title": "Use the Snippet Tool",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -25950,6 +40598,311 @@ In this guide we will set up a Humanloop Pinecone tool and use it to enrich a pr
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ "Tools",
+ ],
+ "content": "In this guide we will set up a Humanloop Pinecone tool and use it to enrich a prompt with the relevant context from a data source
+of documents. This tool combines Pinecone's [https://www.pinecone.io/] semantic search [./key-concepts#semantic-search] with
+OpenAI's embedding models [https://platform.openai.com/docs/guides/embeddings].
+
+----------------------------------------------------------------------------------------------------------------------------------
+
+
+PREREQUISITES
+
+ * A Humanloop account - you can create one by going to our sign up page [https://app.humanloop.com/signup].
+ * A Pinecone account - you can create one by going to their sign up page [https://app.pinecone.io/?sessionType=signup].
+ * Python installed - you can download and install Python by following the steps on the Python download page
+ [https://www.python.org/downloads/].
+
+If you have an existing Pinecone index that was created using one of [OpenAI's embedding
+models](https://platform.openai.com/docs/guides/embeddings), you can skip to section: **Setup Humanloop**
+
+----------------------------------------------------------------------------------------------------------------------------------
+
+
+SET UP PINECONE
+
+
+INSTALL THE PINECONE SDK
+
+If you already have the Pinecone SDK installed, skip to the next section.
+
+### Install the Pinecone Python SDK in your terminal: \`\`\`shell pip install pinecone-client \`\`\` ### Start a Python interpreter:
+\`\`\`shell python \`\`\` ### Go to the [Pinecone console](https://app.pinecone.io/) API Keys tab and create an API key - copy the key
+\`value\` and the \`environment\`. ### Test your Pinecone API key and environment by initialising the SDK \`\`\`python >>> import
+pinecone >>> pinecone.init(api_key="", environment="") \`\`\` ***
+
+
+CREATE A PINECONE INDEX
+
+Now we'll initialise a Pinecone index, which is where we'll store our vector embeddings. We will be using OpenAI's ada model
+[https://platform.openai.com/docs/guides/embeddings/what-are-embeddings] to create vectors to save to Pinecone, which has an
+output dimension of 1536 that we need to specify upfront when creating the index:
+
+import pinecone
+
+# Initialise the SDK
+pinecone.init(api_key="", environment="")
+
+# Create index
+# We can reference the dimension of the embeddings on OpenAI
+# https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
+pinecone.create_index('humanloop-demo', dimension=1536)
+
+# Connect to the index
+index = pinecone.Index('humanloop-demo')
+
+
+----------------------------------------------------------------------------------------------------------------------------------
+
+
+PREPROCESS THE DATA
+
+Now that you have a Pinecone index, we need some data to put in it. In this section we'll pre-process some data ready for
+embedding and storing to the index in the next section.
+
+We'll use the awesome Hugging Face datasets [https://huggingface.co/docs/datasets/load_hub] to source a demo dataset (following
+the Pinecone quick-start guide [https://docs.pinecone.io/docs/semantic-text-search]). In practice you will customise this step to
+your own use case.
+
+### First install Hugging Face datasets using pip:
+
+pip install datasets
+
+
+
+NEXT DOWNLOAD THE QUORA DATASET:
+
+from datasets import load_dataset
+
+dataset = load_dataset('quora', split='train')
+
+
+
+NOW WE CAN PREVIEW THE DATASET - IT CONTAINS ~400K PAIRS OF NATURAL LANGUAGE QUESTIONS FROM QUORA:
+
+print(dataset[:5])
+
+
+{'questions': [{'id': [1, 2],
+ 'text': ['What is the step by step guide to invest in share market in india?',
+ 'What is the step by step guide to invest in share market?']},
+ {'id': [3, 4],
+ 'text': ['What is the story of Kohinoor (Koh-i-Noor) Diamond?',
+ 'What would happen if the Indian government stole the Kohinoor (Koh-i-Noor) diamond back?']},
+ {'id': [5, 6],
+ 'text': ['How can I increase the speed of my internet connection while using a VPN?',
+ 'How can Internet speed be increased by hacking through DNS?']},
+ {'id': [7, 8],
+ 'text': ['Why am I mentally very lonely? How can I solve it?',
+ 'Find the remainder when [math]23^{24}[/math] is divided by 24,23?']},
+ {'id': [9, 10],
+ 'text': ['Which one dissolve in water quikly sugar, salt, methane and carbon di oxide?',
+ 'Which fish would survive in salt water?']}],
+ 'is_duplicate': [False, False, False, False, False]}
+
+
+
+EXTRACT THE TEXT FROM THE QUESTIONS INTO A SINGLE LIST READY FOR EMBEDDING:
+
+questions = []
+
+for record in dataset['questions']:
+ questions.extend(record['text'])
+
+# remove duplicates
+questions = list(set(questions))
+print('\\n'.join(questions[:5]))
+print(f"Number of questions: {len(questions)}")
+
+
+I am currently training at IBM in .NET. What are the probable locations IBM has to offer for this domain?
+Can someone suggest some songs like this one?
+How do sodium bicarbonate and HCL react?
+Who inspires you most and why?
+
+
+***
+
+
+POPULATE PINECONE
+
+Now that you have a Pinecone index and a dataset of text chunks, we can populate the index with embeddings before moving on to
+Humanloop. We'll use one of OpenAI's embedding models to create the vectors for storage.
+
+
+INSTALL AND INITIALISE OPEN AI SDK
+
+If you already have your OpenAI key and the SDK installed, skip to the next section.
+
+### Install the OpenAI SDK using pip:
+
+$ pip install openai
+
+
+
+INITIALISE THE SDK (YOU'LL NEED AN OPENAI KEY FROM YOUR OPENAI ACCOUNT [https://platform.openai.com/account/api-keys])
+
+import openai
+
+openai.api_key = ""
+
+
+
+POPULATE THE INDEX
+
+If you already have a Pinecone index set up, skip to the next section.
+
+### Embed the questions and store them in Pinecone with the corresponding text as metadata:
+
+# For the sake of the demo we just use a small subset of the data
+embed_questions = questions[:100]
+
+for i, question in enumerate(embed_questions):
+ # Embed the question
+ embedding = client.embeddings.create(input=question, model="text-embedding-ada-002").data[0].embedding
+
+ # Upsert to Pinecone - expects tuples of (id, vector, metadata to associate to vector)
+ index.upsert([(str(i), embedding, {"text": question})])
+
+# check number of records in the index
+index.describe_index_stats()
+
+
+
+YOU CAN NOW TRY OUT THE SEMANTIC SEARCH WITH A TEST QUESTION:
+
+test_query = "What is the first law of Thermodynamics?"
+
+# create the query vector
+test_query = openai.Embedding.create(
+ input=test_query, model="text-embedding-ada-002"
+ ).data[0].embedding
+
+# run the query
+result = index.query(test_query, top_k=3, include_metadata=True)
+print(result)
+
+
+You should see semantically similar questions retrieved with the corresponding similarity scores:
+
+{'matches': [{'id': '72',
+ 'metadata': {'text': 'Is kinetic energy gained when it is moving '
+ 'at a constant speed or when it is '
+ 'accelerating?'},
+ 'score': 0.792976439,
+ 'values': []},
+ {'id': '28',
+ 'metadata': {'text': 'Is energy in vacuum real? How do we know '
+ 'that this energy that can be borrowed and '
+ 'returned immediately is real if virtual '
+ "particles didn't exist then?"},
+ 'score': 0.787870169,
+ 'values': []},
+ {'id': '425',
+ 'metadata': {'text': 'What is the most intriguing scientific '
+ 'paradox?'},
+ 'score': 0.78692925,
+ 'values': []}],
+ 'namespace': ''}
+
+
+***
+
+
+SET UP HUMANLOOP
+
+
+CONFIGURE PINECONE
+
+You're now ready to configure a Pinecone tool in Humanloop:
+
+
+CREATE A NEW TOOLS
+
+From the Humanloop dashboard or the sidebar, click 'New File' and select Tool.
+
+
+SELECT PINECONE SEARCH
+
+Select the Pinecone Search option
+
+
+CONFIGURE PINECONE AND OPENAI
+
+These should be the same values you used when setting up your Pinecone index in the previous sections. All these values are
+editable later.
+
+ 1. For Pinecone: populate values for Name (use quora_search), pinecone_key, pinecone_environment, pinecone_index (note: we named
+ our index humanloop-demo). The name will be used to create the signature for the tool that you will use in your prompt
+ templates in the next section.
+ 2. For OpenAI: populate the openai_key and openai_model (note: we used the text-embedding-ada-002 model above)
+
+
+SAVE THE TOOL
+
+By selecting Save.
+
+An active tool for quora_search will now appear on the tools tab and you're ready to use it within a prompt template.
+
+[file:8022b779-3ede-44f3-84d8-00cf4d0bd61b]
+
+
+ENHANCE YOUR PROMPT TEMPLATE
+
+Now that we have a Pinecone tool configured we can use this to pull relevant context into your prompts.
+
+This is an effective way to enrich your LLM applications with knowledge from your own internal documents and also help fix
+hallucinations.
+
+
+NAVIGATE TO THE EDITOR OF YOUR PROMPT
+
+
+COPY AND PASTE THE FOLLOWING TEXT INTO THE PROMPT TEMPLATE BOX:
+
+You are a helpful intern.
+Very succinctly summarise the types of questions people are asking on Quora about: {{topic}}
+
+Reference the following search results of Quora questions {{quora_search(topic, 10)}}:
+
+Summary:
+
+
+
+ON THE RIGHT HAND SIDE UNDER COMPLETIONS, ENTER THE FOLLOWING THREE EXAMPLES OF TOPICS: GOOGLE, PHYSICS AND EXERCISE.
+
+
+PRESS THE RUN ALL BUTTON BOTTOM RIGHT (OR USE THE KEYBOARD SHORTCUT COMMAND + ENTER).
+
+On the right hand side the results from calling the Pinecone tool for the specific topic will be shown highlighted in purple and
+the final summary provided by the LLM that uses these results will be highlighted in green.
+
+[file:6da813b7-4915-419a-82ae-9037c0f7685f]
+
+Each active tool in your organisation will have a unique signature that you can use to specify the tool within a prompt template.
+
+You can find the signature in the pink box on each tool card on the Tools page.
+
+You can also use double curly brackets - {{ - within the prompt template in the Prompt Editor to see a dropdown of available
+tools.
+
+In the case of Pinecone tools, the signature takes two positional arguments: query(the query text passed to Pinecone) and
+top_k(the number of similar chunks to retrieve from Pinecone for the query).
+
+[file:b06ff5bb-97ab-4d18-b53b-a54a846d00c2]",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/set-up-semantic-search",
+ "title": "Set up semantic search (RAG)",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -26466,6 +41419,86 @@ In this guide we will demonstrate how to use Humanloop’s fine-tuning workflow
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ ],
+ "content": "This feature is not available for the Free tier. Please contact us if you wish to learn more about our [Enterprise
+plan](https://humanloop.com/pricing)
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/guides/create-prompt] guide first.
+ * You have integrated humanloop.complete_deployed() or the humanloop.chat_deployed() endpoints, along with the
+ humanloop.feedback() with the API [https://www.postman.com/humanloop/workspace/humanloop] or Python SDK
+ [./generate-and-log-with-the-sdk].
+
+A common question is how much data do I need to fine-tune effectively? Here we can reference the OpenAI guidelines
+[https://beta.openai.com/docs/guides/fine-tuning]:
+
+> The more training examples you have, the better. We recommend having at least a couple hundred examples. In general, we've found
+> that each doubling of the dataset size leads to a linear increase in model quality.
+
+
+FINE-TUNING
+
+The first part of fine-tuning is to select the data you wish to fine-tune on.
+
+### Go to your Humanloop project and navigate to **Logs** tab.
+
+
+CREATE A FILTER
+
+Using the + Filter button above the table of the logs you would like to fine-tune on.
+
+For example, all the logs that have received a positive upvote in the feedback captured from your end users.
+
+[file:1e0a900f-a8f2-4cac-b55d-40a23bf11a59]
+
+
+CLICK THE ACTIONS BUTTON, THEN CLICK THE NEW FINE-TUNED MODEL BUTTON TO SET UP THE FINETUNING PROCESS.
+
+
+ENTER THE APPROPRIATE PARAMETERS FOR THE FINE-TUNED MODEL.
+
+ 1. Enter a Model name. This will be used as the suffix parameter in OpenAI’s fine-tune interface. For example, a suffix of
+ "custom-model-name" would produce a model name like ada:ft-your-org:custom-model-name-2022-02-15-04-21-04.
+ 2. Choose the Base model to fine-tune. This can be ada, babbage, curie, or davinci.
+ 3. Select a Validation split percentage. This is the proportion of data that will be used for validation. Metrics will be
+ periodically calculated against the validation data during training.
+ 4. Enter a Data snapshot name. Humanloop associates a data snapshot to every fine-tuned model instance so it is easy to keep
+ track of what data is used (you can see yourexisting data snapshots on the Settings/Data snapshots page)
+
+[file:61f1b725-5f64-454c-8b84-3599656a52db]
+
+
+CLICK CREATE
+
+The fine-tuning process runs asynchronously and may take up to a couple of hours to complete depending on your data snapshot size.
+
+
+SEE THE PROGRESS
+
+Navigate to the Fine-tuning tab to see the progress of the fine-tuning process.
+
+Coming soon - notifications for when your fine-tuning jobs have completed.
+
+[file:e017f187-c465-4a9c-b5a4-0de964ae2222]
+
+
+WHEN THE STATUS OF THE FINE-TUNED MODEL IS MARKED AS SUCCESSFUL, THE MODEL IS READY TO USE.
+
+🎉 You can now use this fine-tuned model in a Prompt and evaluate its performance.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/finetune-a-model",
+ "title": "Fine-tune a model",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -26570,6 +41603,66 @@ API keys allow you to access the Humanloop API programmatically in your app.",
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ ],
+ "content": "CREATE A NEW API KEY
+
+
+GO TO YOUR ORGANIZATION'S API KEYS PAGE [https://app.humanloop.com/account/api-keys].
+
+
+CLICK THE CREATE NEW API KEY BUTTON.
+
+
+ENTER A NAME FOR YOUR API KEY.
+
+Choose a name that helps you identify the key's purpose. You can't change the name of an API key after it's created.
+
+
+CLICK CREATE.
+
+[file:efda5ed0-a0a2-449c-8f26-4c2e092e2917]
+
+
+COPY THE GENERATED API KEY
+
+Save it in a secure location. You will not be shown the full API key again.
+
+[file:5043e675-df30-4288-89c0-06d414a9c896]
+
+
+REVOKE AN API KEY
+
+You can revoke an existing API key if it is no longer needed.
+
+When an API key is revoked, future API requests that use this key will be rejected. Any systems that are dependent on this key
+will no longer work. ### Go to API keys page
+
+Go to your Organization's API Keys page [https://app.humanloop.com/account/api-keys].
+
+
+IDENTIFY THE API KEY
+
+Find the key you wish to revoke by its name or by the displayed trailing characters.
+
+
+CLICK 'REVOKE'
+
+Click the three dots button on the right of its row to open its menu. Click Revoke. A confirmation dialog will be displayed. Click
+Remove.
+
+[file:1c5d15e7-cd82-4ab2-ad35-5da6c8548c5f]",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/create-and-revoke-api-keys",
+ "title": "Manage API keys",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -26673,6 +41766,48 @@ Inviting people to your organization allows them to interact with your Humanloop
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ ],
+ "content": "Inviting people to your organization allows them to interact with your Humanloop projects:
+
+ * Teammates will be able to create new model configs and experiments
+ * Developers will be able to get an API key to interact with projects through the SDK
+ * Annotators may provide feedback on logged datapoints using the Data tab (in addition to feedback captured from your end-users
+ via the SDK feedback integration)
+
+
+INVITE USERS
+
+To invite users to your organization:
+
+
+GO TO YOUR ORGANIZATION'S MEMBERS PAGE [https://app.humanloop.com/account/members]
+
+
+ENTER THE EMAIL ADDRESS
+
+Enter the email of the person you wish to invite into the Invite members box.
+
+[file:a9d909b7-eac2-4ccb-b828-e160721c9b94]
+
+
+CLICK SEND INVITE.
+
+An email will be sent to the entered email address, inviting them to the organization. If the entered email address is not already
+a Humanloop user, they will be prompted to create an account before being added to the organization.
+
+🎉 Once they create an account, they can view your projects at the same URL to begin collaborating.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/invite-collaborators",
+ "title": "Invite collaborators",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -26729,6 +41864,140 @@ In this guide we will demonstrate how to create and use environments.
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Guides",
+ ],
+ "content": "Environments [/docs/environments] enable you to deploy model configurations and experiments, making them accessible via API, while
+also maintaining a streamlined production workflow. These environments are created at the organizational level and can be utilized
+on a per-project basis.
+
+
+CREATE AN ENVIRONMENT
+
+### Go to your Organization's [Environments](https://app.humanloop.com/account/environments) page.
+
+
+CLICK THE + ENVIRONMENT BUTTON TO OPEN THE NEW ENVIRONMENT DIALOG.
+
+
+ASSIGN A CUSTOM NAME TO THE ENVIRONMENT.
+
+
+CLICK CREATE.
+
+[file:3175c307-fd5c-4178-8488-940700d92042]
+
+----------------------------------------------------------------------------------------------------------------------------------
+
+
+DEPLOYING TO AN ENVIRONMENT
+
+
+PREREQUISITES
+
+ * You already have a Prompt — if not, please follow our Prompt creation [/docs/guides/create-prompt] guide first.
+ * Ensure that your project has existing model configs that you wish to use.
+
+To deploy a model config to an environment:
+
+### Navigate to the **Dashboard** of your project.
+
+
+CLICK THE DROPDOWN MENU OF THE ENVIRONMENT.
+
+[file:a13e72ab-9366-4763-96a7-bccd57ada8b9]
+
+
+CLICK THE CHANGE DEPLOYMENT BUTTON
+
+
+SELECT A VERSION
+
+From the model configs or experiments within that project, click on the one that you wish to deploy to the target environment
+
+[file:42640269-c870-4228-873b-d40d0842d33d]
+
+
+CLICK THE DEPLOY BUTTON.
+
+----------------------------------------------------------------------------------------------------------------------------------
+
+
+CALLING THE MODEL IN THE ENVIRONMENT
+
+
+PREREQUISITES
+
+ * You have already deployed either a chat or completion model config - if not, please follow the steps in either the Generate
+ chat responses [./chat-using-the-sdk] or Generate completions [./completion-using-the-sdk] guides.
+ * You have multiple environments, with a model config deployed in a non-default environment. See the Deploying to an environment
+ section above.
+
+The following steps assume you're using an OpenAI model and that you're calling a \`chat\` workflow. The steps needed to target a
+specific environment for a \`completion\` workflow are similar. ### Navigate to the **Models** tab of your Humanloop project. ###
+Click the dropdown menu of the environment you wish to use. ### Click the **Use API** menu option. A dialog will open with code
+snippets. Select the language you wish to use (e.g. Python, TypeScript). The value of \`environment\` parameter is the name of
+environment you wish to target via the chat-deployed call. An example of this can be seen in the code below.
+
+import os
+from humanloop import Humanloop
+
+HUMANLOOP_API_KEY = os.getenv("HUMANLOOP_API_KEY")
+
+humanloop = Humanloop(api_key=HUMANLOOP_API_KEY)
+
+response = humanloop.chat_deployed(
+ project="YOUR_PROJECT_NAME",
+ inputs={},
+ messages=[{ "role": "user", "content": "Tell a joke" }],
+ provider_api_keys={
+ "openai": "OPENAI_KEY_HERE"
+ },
+ environment="YOUR_ENVIRONMENT_NAME"
+)
+
+print(response.data[0]output)
+
+
+[file:504054fc-1798-436c-922a-052fcc9e0d59] ***
+
+
+UPDATING THE DEFAULT ENVIRONMENT
+
+Only Enterprise customers can update their default environment
+
+
+PREREQUISITES
+
+ * You have multiple environments - if not first go through the Create an environment [./deploy-to-an-environment] section.
+
+Every organization will have a default environment. This can be updated by the following:
+
+
+GO TO YOUR ORGANIZATION'S ENVIRONMENTS [https://app.humanloop.com/account/environments] PAGE.
+
+
+CLICK ON THE DROPDOWN MENU OF AN ENVIRONMENT THAT IS NOT ALREADY THE DEFAULT.
+
+
+CLICK THE MAKE DEFAULT OPTION
+
+A dialog will open asking you if you are certain this is a change you want to make. If so, click the Make default button.
+
+
+VERIFY THE DEFAULT TAG HAS MOVED TO THE ENVIRONMENT YOU SELECTED.
+
+[file:bd81b4d2-f2e7-49e9-8beb-82d5d9818e38]",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/guides/deploy-to-an-environment",
+ "title": "Deploy to environments",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -26986,6 +42255,117 @@ endpoint: chat
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Core concepts",
+ ],
+ "content": "[file:eaa91cac-b21c-408b-b20c-0cc3794f34fd]
+
+A Prompt on Humanloop encapsulates the instructions and other configuration for how a large language model should perform a
+specific task. Each change in any of the following properties creates a new version of the Prompt:
+
+ * the template such as Write a song about {{topic}}
+ * the model e.g. gpt-4o
+ * all the parameters to the model such as temperature, max_tokens, top_p etc.
+ * any tools available to the model
+
+A Prompt is callable in that if you supply the necessary inputs, it will return a response from the model.
+
+Inputs are defined in the template through the double-curly bracket syntax e.g. {{topic}} and the value of the variable will need
+to be supplied when you call the Prompt to create a generation.
+
+This separation of concerns, keeping configuration separate from the query time data, is crucial for enabling you to experiment
+with different configurations and evaluate any changes. The Prompt stores the configuration and the query time data are stored in
+Logs [./logs], which can then be re-used in Datasets.
+
+FYI: Prompts have recently been renamed from 'Projects'. The Project's "Model Configs" are now just each version of a Prompt. Some
+of the documentation and APIs may still refer to Projects and Model Configs. Note that we use a capitalized
+"[Prompt](/docs/prompts)" to refer to the entity in Humanloop, and a lowercase "prompt" to refer to the general concept of input
+to the model.
+
+---
+model: gpt-4
+temperature: 1.0
+max_tokens: -1
+provider: openai
+endpoint: chat
+---
+
+ Write a song about {{topic}}
+
+
+
+
+VERSIONING
+
+A Prompt file will have multiple versions as you try out different models, params or templates, but they should all be doing the
+same task, and in general should be swappable with one-another.
+
+By versioning your Prompts, you can track how adjustments to the template or parameters influence the LLM's responses. This is
+crucial for iterative development, as you can pinpoint which versions produce the most relevant or accurate outputs for your
+specific use case.
+
+
+WHEN TO CREATE A NEW PROMPT
+
+You should create a new Prompt for every different ‘task to be done’ with the LLM. For example each of these tasks are things that
+can be done by an LLM and should be a separate Prompt File: extractive summary, title creator, outline generator etc.
+
+We've seen people find it useful to also create a Prompt called 'Playground' where they can free form experiment without concern
+of breaking anything or making a mess of their other Prompts.
+
+
+USING PROMPTS
+
+Prompts are callable as an API. You supply and query-time data such as input values or user messages, and the model will respond
+with its text output.
+
+const chatResponse = await humanloop.chatDeployed({
+ project: "song writer",
+ inputs: {
+ topic: "debugging compiler errors",
+ },
+});
+
+
+You can also use Prompts without proxying all requests through Humanloop.
+
+
+SERIALIZATION (.PROMPT FILE)
+
+Our .prompt file format is a serialized version of a model config that is designed to be human-readable and suitable for checking
+into your version control systems alongside your code. See the .prompt files reference [./prompt-file-format] reference for more
+details.
+
+
+FORMAT
+
+The .prompt file is heavily inspired by MDX [https://mdxjs.com/], with model and hyperparameters specified in a YAML header
+alongside a JSX-inspired format for your Chat Template.
+
+
+BASIC EXAMPLES
+
+\`\`\`jsx Chat --- model: gpt-4 temperature: 1.0 max_tokens: -1 provider: openai endpoint: chat --- You are a friendly assistant. \`\`\`
+\`\`\`jsx Completion --- model: claude-2 temperature: 0.7 max_tokens: 256 top_p: 1.0 provider: anthropic endpoint: complete ---
+Autocomplete the sentence.
+
+Context: {{context}}
+
+{{sentence}}
+
+
+
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/prompts",
+ "title": "Prompts",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -27177,6 +42557,89 @@ Some Tools are executable within Humanloop, and these offer the greatest utility
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Core concepts",
+ ],
+ "content": "[file:b0cea7a9-cf40-41fb-91ce-5085cf7b4bf2]
+
+Tools are functions that can extend your LLMs with access to external data sources and enabling them to take actions.
+
+Humanloop Tools can be used in multiple ways:
+
+ * by the LLM by OpenAI function calling [https://platform.openai.com/docs/guides/function-calling])
+ * within the Prompt template
+ * as part of a chain of events such as a Retrieval Tool in a RAG pipeline
+
+Some Tools are executable within Humanloop, and these offer the greatest utility and convenience. For example, Humanloop has
+pre-built integrations for Google search and Pinecone have and so these Tools can be executed and the results inserted into the
+API or Editor automatically.
+
+
+TOOL USE (FUNCTION CALLING)
+
+Certain large language models support tool use or "function calling". For these models, you can supply the description of
+functions and the model can choose to call one or more of them by providing the values to call the functions with.
+
+[file:b950fee9-1b89-4bcc-8a7a-cd3f097f57cf]
+
+
+Tools all have a functional interface that can be supplied as the JSONSchema needed for function calling. Additionally, if the
+Tool is executable on Humanloop, the result of any tool will automatically be inserted into the response in the API and in the
+Editor.
+
+Tools for function calling can be defined inline in our Editor or centrally managed for an organization.
+
+
+TOOLS IN A PROMPT TEMPLATE
+
+You can add a tool call in a prompt template and the result will be inserted into the prompt sent to the model. This allows you to
+insert retrieved information into your LLMs calls.
+
+For example, if you have {{ google("population of india") }} in your template, this Google tool will get executed and replaced
+with the resulting text “1.42 billion (2024)” before the prompt is sent to the model. Additionally, if your template contains a
+Tool call that uses an input variable e.g. {{ google(query) }} this will take the value of the input supplied in the request,
+compute the output of the Google tool, and insert that result into the resulting prompt that is sent to the model.
+
+Example of a Tool being used within a Prompt template. This example will mean that this Prompt needs two inputs to be supplied
+(\`query\`, and \`top_k\`) [file:62d3d155-1f83-458a-b9f1-b103fc3ba544]
+
+Example of a Tool being used within a Prompt template. This example will mean that this Prompt needs two inputs to be supplied
+(query, and top_k)
+
+
+TOOLS WITHIN A CHAIN
+
+You can call a Tool within a session of events and post the result to Humanloop. For example in a RAG pipeline, instrumenting your
+retrieval function as a Tool, enables you to be able to trace through the full sequence of events. The retrieval Tool will be
+versioned and the logs will be available in the Humanloop UI, enabling you to independently improve that step in the pipeline.
+
+
+SUPPORTED TOOLS
+
+
+THIRD-PARTY INTEGRATIONS
+
+ * Pinecone Search - Vector similarity search using Pinecone vector DB and OpenAI embeddings.
+ * Google Search - API for searching Google: https://serpapi.com/ [https://serpapi.com/].
+ * GET API - Send a GET request to an external API.
+
+
+HUMANLOOP TOOLS
+
+ * Snippet Tool - Create reusable key/value pairs for use in prompts - see how to use the Snippet Tool
+ [/docs/guides/snippet-tool].
+ * JSON Schema - JSON schema that can be used across multiple Prompts - see how to link a JSON Schema Tool
+ [/docs/guides/link-jsonschema-tool].",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/tools",
+ "title": "Tools",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -27323,6 +42786,38 @@ Datasets can be created via CSV upload, converting from existing Logs in your pr
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Core concepts",
+ ],
+ "content": "[file:0eb61a54-2ea0-4644-aad6-6f7ff921b2f2]
+
+A datapoint consists of three things:
+
+ * Inputs: a collection of prompt variable values which are interpolated into the prompt template of your model config at
+ generation time (i.e. they replace the {{ variables }} you define in the prompt template).
+ * Messages: for chat models, as well as the prompt template, you may have a history of prior chat messages from the same
+ conversation forming part of the input to the next generation. Datapoints can have these messages included as part of the
+ input.
+ * Target: data representing the expected or intended output of the model. In the simplest case, this can simply be a string
+ representing the exact output you hope the model produces for the example represented by the datapoint. In more complex cases,
+ you can define an arbitrary JSON object for target with whatever fields are necessary to help you specify the intended
+ behaviour. You can then use our evaluations feature to run the necessary code to compare the actual generated output with your
+ target data to determine whether the result was as expected.
+
+
+[file:6f78b557-39a8-4aa7-9f8e-7422366a8670]
+
+Datasets can be created via CSV upload, converting from existing Logs in your project, or by API requests.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/datasets",
+ "title": "Datasets",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -27348,6 +42843,87 @@ Evaluators can be used for monitoring live data as well as running evaluations."
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Core concepts",
+ ],
+ "content": "[file:bbb4a5dd-7cb0-491c-90e0-db33d16cd18f]
+
+Evaluators are functions which take an LLM-generated Log as an argument and return an evaluation. The evaluation is typically
+either a boolean or a number, indicating how well the model performed according to criteria you determine based on your use case.
+
+Evaluators can be used for monitoring live data as well as running evaluations.
+
+
+TYPES OF EVALUATORS
+
+There are three types of Evaluators: AI, code, and human.
+
+ * Python - using our in-browser editor, define simple Python functions to act as evaluators
+ * AI - use a large language model to evaluate another LLM! Our evaluator editor allows you to define a special-purpose prompt
+ which passes data from the underlying log to a language model. This type of evaluation is particularly useful for more
+ subjective evaluation such as verifying appropriate tone-of-voice or factuality given an input set of facts.
+ * Human - collate human feedback against the logs
+
+
+MODES: MONITORING VS. TESTING
+
+Evaluation is useful for both testing new model configs as you develop them and for monitoring live deployments that are already
+in production.
+
+To handle these different use cases, there are two distinct modes of evaluators - online and offline.
+
+
+ONLINE
+
+Online evaluators are for use on logs generated in your project, including live in production. Typically, they are used to monitor
+deployed model performance over time.
+
+Online evaluators can be set to run automatically whenever logs are added to a project. The evaluator takes the log as an
+argument.
+
+
+OFFLINE
+
+Offline evaluators are for use with predefined test datasets [./datasets] in order to evaluate models as you iterate in your
+prompt engineering workflow, or to test for regressions in a CI environment.
+
+A test dataset is a collection of datapoints, which are roughly analogous to unit tests or test cases in traditional programming.
+Each datapoint specifies inputs to your model and (optionally) some target data.
+
+When you run an offline evaluation, Humanloop iterates through each datapoint in the dataset and triggers a fresh LLM generation
+using the inputs of the testcase and the model config being evaluated. For each test case, your evaluator function will be called,
+taking as arguments the freshly generated log and the testcase datapoint that gave rise to it. Typically, you would write your
+evaluator to perform some domain-specific logic to determine whether the model-generated log meets your desired criteria (as
+specified in the datapoint 'target').
+
+
+HUMANLOOP-HOSTED VS. SELF-HOSTED
+
+Conceptually, evaluation runs have two components:
+
+ 1. Generation of logs from the datapoints
+ 2. Evaluating those logs.
+
+Using the Evaluations API, Humanloop offers the ability to generate logs either within the Humanloop runtime, or self-hosted.
+Similarly, evaluations of the logs can be performed in the Humanloop runtime (using evaluators that you can define in-app) or
+self-hosted (see our guide on self-hosted evaluations [./self-hosted-evaluations]).
+
+In fact, it's possible to mix-and-match self-hosted and Humanloop-runtime generations and evaluations in any combination you wish.
+When creating an evaluation via the API, set the hl_generated flag to False to indicate that you are posting the logs from your
+own infrastructure (see our guide on evaluating externally-generated logs [./evaluating-externally-generated-logs]). Include an
+evaluator of type External to indicate that you will post evaluation results from your own infrastructure. You can include
+multiple evaluators on any run, and these can include any combination of External (i.e. self-hosted) and Humanloop-runtime
+evaluators.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/evaluators",
+ "title": "Evaluators",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -27489,6 +43065,25 @@ For the example of a Prompt above, the Log would have one \`input\` called ‘to
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Core concepts",
+ ],
+ "content": "All Prompts [./prompts], Tools [./tools] and Evaluators [./evaluators] produce Logs. A Log contains the inputs and the outputs and
+tracks which version of Prompt/Tool/Evaluator was used.
+
+For the example of a Prompt above, the Log would have one input called ‘topic’ and the output will be the completion.
+
+A Log which contains an input query [file:7b05abc5-c1bd-46e2-806c-70edf6fab22a]",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/logs",
+ "title": "Logs",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -27510,6 +43105,74 @@ Environments enable you to deploy your model configurations to specific environm
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Core concepts",
+ ],
+ "content": "Environments enable you to deploy your model configurations to specific environments, allowing you to separately manage the
+deployment workflow between testing and production. With environments, you have the control required to manage the full LLM
+deployment lifecycle.
+
+
+MANAGING YOUR ENVIRONMENTS
+
+Every organisation automatically receives a default production environment. You can create additional environments with custom
+names by visiting your organisation's environments page [https://app.humanloop.com/account/environments].
+
+Only Enterprise customers can create more than one environment
+
+The environments you define for your organisation will be available for each project and can be viewed in the project dashboard
+once created.
+
+[file:a780c738-2da6-432c-95bb-158ea103d44d]
+
+THE DEFAULT ENVIRONMENT
+
+By default, the production environment is marked as the Default environment. This means that all API calls targeting the "Active
+Deployment," such as Get Active Config [/doc/reference/projects_getactiveconfig] or Chat Deployed
+[/api-reference/chats/createdeployed] will use this environment. You can rename the default environment on the organisation's
+environments [https://app.humanloop.com/account/environments] page.
+
+Renaming the environments will take immediate effect, so ensure that this change is planned and does not disrupt your production
+workflows.
+
+
+USING ENVIRONMENTS
+
+Once created on the environments page, environments can be used for each project and are visible in the respective project
+dashboards.
+
+You can deploy directly to a specific environment by selecting it in the Deployments section.
+
+[file:1d3bf28c-5591-47b6-817a-b10238bd7935]
+
+Alternatively, you can deploy to multiple environments simultaneously by deploying a Model Config from either the Editor or the
+Model Configs table.
+
+
+USING ENVIRONMENTS VIA API
+
+[file:3e7ce42e-e625-49cd-abbd-51965ca1d3f4]
+
+For v4.0 API endpoints that support Active Deployments, such as Get Active Config [/api-reference/projects/getactiveconfig] or
+Chat Deployed [/api-reference/chats/createdeployed], you can now optionally point to a model configuration deployed in a specific
+environment by including an optional additional environment field.
+
+You can find this information in our v4.0 API Documentation or within the environment card in the Project Dashboard under the "Use
+API" option.
+
+Clicking on the "Use API" option will provide code snippets that demonstrate the usage of the environment variable in practice.
+
+[file:67da63be-577c-4fe7-a35a-78522f699c41]",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/environments",
+ "title": "Environments",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -27626,6 +43289,126 @@ Clicking on the "Use API" option will provide code snippets that demonstrate the
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Core concepts",
+ ],
+ "content": "PROJECTS
+
+Projects are now [Prompts](./prompts) (and we've added [Tools](./tools) and [Evaluators](./evaluators) special types). The V4 API
+still refers to projects however as the main way to interact with your Prompts.
+
+A project groups together the data, prompts and models that are all achieving the same task to be done using the large language
+model.
+
+For example, if you have a task of ‘generate google ad copy’, that should be a project. If you have a summarization that works on
+top of tweets, that should be a project. You should have many separate projects for each of your tasks on top of the LLM.
+
+Screenshot from Peppertype AI Copywriting assistant, each of these ‘apps’ corresponds to a project within Humanloop for managing
+the best way to get generations from large language models. [file:330669f5-88fb-4989-aae5-31e14ebc1e2d]
+
+
+MODELS
+
+The Humanloop platform gives you the ability to use and improve large language models like GPT‑3. There are many different models
+from multiple providers. The models may be different sizes, may have been trained differently, and are likely to perform
+differently. Humanloop gives you the ability to find the best model for your situation and optimise performance and cost.
+
+Model Provider is where the model is from. For example, ‘OpenAI’, or ‘AI21’ etc.
+
+Model refers to the actual AI model that should be used. Such as text-davinci-002 (large, relatively expensive, highly capable
+model trained to follow instructions) babbage (smaller, cheaper, faster but worse at creative tasks), or gpt-j (an open source
+model – coming soon!).
+
+Fine-tuned model - finetuning takes one of the existing models and specialises it for a specific task by further training it with
+some task-specific data.
+
+Finetuning lets you get more out of the models by providing:
+
+ 1. Higher quality results than prompt design
+ 2. Ability to train on more examples than can fit in a prompt
+ 3. Token savings due to shorter prompts
+ 4. Lower latency requests
+
+
+MODEL CONFIG
+
+This is the prompt template, the model (e.g. text-davinci-002) and the various parameters such as temperature that define how the
+model will generate text.
+
+A new model config is generated for each unique set of parameters used within that project. This is so you can compare different
+model configs to see which perform better, for things like the prompt, or settings like temperature, or stop sequences.
+
+
+PROMPT TEMPLATES
+
+This is the prompt that is fed to the model, which also allows the use of variables. This allows you track how the same prompt is
+being used with different input values.
+
+The variables are surrounded by {{ and }} like this:
+
+The input name is ‘topic’ and the value will be inserted into the prompt at runtime. [file:988221b0-e80d-4348-b93c-8c1555a2314e]
+
+
+INPUT VARIABLES
+
+Variables are used in prompts to allow you to insert different values into the prompt at runtime. For example, in the prompt Write
+a song about {{topic}}, {{topic}} is a variable that can be replaced with different values at runtime.
+
+Variables in a prompt template are called Inputs.
+
+
+LOG
+
+All Prompts [./prompts], Tools [./tools] and Evaluators [./evaluators] produce Logs. A Log containsthe inputs and the outputs and
+tracks which version of Prompt/Tool/Evaluator was used.
+
+For the example of a Prompt above, the Log would have one input called ‘topic’ and the output will be the completion.
+
+An example Log [file:9bdb5c1e-e140-412d-b4ba-50f06e742745]
+
+
+DATAPOINT
+
+A datapoint is an input-output pair that is used to evaluate the performance of a model. It is different to a Log in that it is
+not tied to any specific version of a Prompt (or Tool or Evaluator), and that the target is an arbitrary object that can be used
+to evaluate the output of the model. See Datasets [./datasets] for more information.
+
+
+FEEDBACK
+
+Human feedback is crucial to help understand how your models are performing and to direct you in the ways to improve them.
+
+Explicit feedback these are purposeful actions to review the generations. For example, ‘thumbs up/down’ button presses.
+
+Implicit feedback – actions taken by your users may signal whether the generation was good or bad, for example, whether the user
+‘copied’ the generation, ‘saved it’ or ‘dismissed it’ (which is negative feedback).
+
+You can also have corrections as a feedback too.
+
+
+EXPERIMENT
+
+Experiments help remove the guesswork from working with large language models. Experiments allow you to set up A/B test between
+multiple different model configs. This enables you to try out alternative prompts or models and use the feedback from your users
+to determine which works better.
+
+
+SEMANTIC SEARCH
+
+Semantic search is an effective way to retrieve the most relevant information for a query from a large dataset of documents. The
+documents are typically split into small chunks of text that are stored as vector embeddings which are numerical representations
+for the meaning of text. Retrieval is carried out by first embedding the query and then using some measure of vector similarity to
+find the most similar embeddings from the dataset and return the associated chunks of text.",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/key-concepts",
+ "title": "Key Concepts",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -27869,6 +43652,27 @@ Visit our [Github examples repo](https://github.com/humanloop/examples) for a co
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "Examples",
+ ],
+ "content": "Visit our Github examples repo [https://github.com/humanloop/examples] for a collection of usage examples of Humanloop.
+
+
+CONTENTS
+
+Github Description SDK Chat Logging Tool Calling Streaming chatbot-starter [https://github.com/humanloop/chatbot-starter/] An
+open-source AI chatbot app template built with Next.js, the Vercel AI SDK, OpenAI, and Humanloop. TypeScript ✔️ ✔️ ✔️ asap
+[https://github.com/humanloop/asap] CLI assistant for solving dev issues in your projects or the command line. TypeScript ✔️ ✔️ ✔️",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/examples",
+ "title": "Example Projects",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -27908,6 +43712,79 @@ Humanloop supports all the major large language model providers, including OpenA
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "References",
+ ],
+ "content": "Humanloop supports all the major large language model providers, including OpenAI, Anthropic, Google, Azure, and more.
+Additionally, you can use your own custom models with with the API and still benefit from the Humanloop platform.
+
+
+PROVIDERS
+
+Here is a summary of which providers are supported, and what information is available for each provider automatically.
+
+Provider Models Cost information Token information OpenAI ✅ ✅ ✅ Anthropic ✅ ✅ ✅ Google ✅ ✅ ✅ Azure ✅ ✅ ✅ Cohere ✅ ✅ ✅ Llama ✅ Groq
+✅ AWS Bedrock Anthropic, Llama
+
+| Custom | ✅ | User-defined | User-defined |
+
+Adding in more providers is driven by customer demand. If you have a specific provider or model you would like to see supported,
+please reach out to us at support@humanloop.com [support@humanloop.com].
+
+
+MODELS
+
+The following are models that are integrated with Humanloop. This means that they can be used in the Prompt Editor and are
+callable through the Humanloop API. If you have a specific model you would like to see supported, please reach out to us at
+support@humanloop.com [support@humanloop.com].
+
+Remember, you can always use any model you want including your own self-hosted models, if you orchestrate the API calls yourself
+and log the data to Humanloop.
+
+Provider Model Max Prompt Tokens Max Output Tokens Cost per Prompt Token Cost per Output Token Tool Support Image Support openai
+gpt-4o 128000 4096 $0.000005 $0.000015 ✅ ✅ openai gpt-4o-mini 128000 4096 $0.00000015 $0.0000006 ✅ ✅ openai gpt-4 8192 4096
+$0.00003 $0.00006 ✅ ❌ openai gpt-4-turbo 128000 4096 $0.00001 $0.00003 ✅ ✅ openai gpt-4-turbo-2024-04-09 128000 4096 $0.00001
+$0.00003 ✅ ❌ openai gpt-4-32k 32768 4096 $0.00003 $0.00003 ✅ ❌ openai gpt-4-1106-preview 128000 4096 $0.00001 $0.00003 ✅ ❌ openai
+gpt-4-0125-preview 128000 4096 $0.00001 $0.00003 ✅ ❌ openai gpt-4-vision 128000 4096 $0.00001 $0.00003 ✅ ✅ openai
+gpt-4-1106-vision-preview 16385 4096 $0.0000015 $0.000002 ✅ ❌ openai gpt-3.5-turbo 16385 4096 $0.0000015 $0.000002 ✅ ❌ openai
+gpt-3.5-turbo-instruct 8192 4097 $0.0000015 $0.000002 ✅ ❌ openai babbage-002 16384 16384 $0.0000004 $0.0000004 ✅ ❌ openai
+davinci-002 16384 16384 $0.000002 $0.000002 ✅ ❌ openai ft:gpt-3.5-turbo 4097 4096 $0.000003 $0.000006 ✅ ❌ openai ft:davinci-002
+16384 16384 $0.000002 $0.000002 ✅ ❌ openai text-moderation 32768 32768 $0.000003 $0.000004 ✅ ❌ anthropic
+claude-3-5-sonnet-20240620 200000 4096 $0.000003 $0.000015 ✅ ✅ anthropic claude-3-opus-20240229 200000 4096 $0.000015 $0.000075 ✅
+❌ anthropic claude-3-sonnet-20240229 200000 4096 $0.000003 $0.000015 ✅ ❌ anthropic claude-3-haiku-20240307 200000 4096 $0.00000025
+$0.00000125 ✅ ❌ anthropic claude-2.1 100000 4096 $0.00000025 $0.000024 ❌ ❌ anthropic claude-2 100000 4096 $0.000008 $0.000024 ❌ ❌
+anthropic claude-instant-1.2 100000 4096 $0.000008 $0.000024 ❌ ❌ anthropic claude-instant-1 100000 4096 $0.0000008 $0.0000024 ❌ ❌
+google gemini-pro-vision 16384 2048 $0.00000025 $0.0000005 ❌ ✅ google gemini-1.0-pro-vision 16384 2048 $0.00000025 $0.0000005 ❌ ✅
+google gemini-pro 32760 8192 $0.00000025 $0.0000005 ❌ ❌ google gemini-1.0-pro 32760 8192 $0.00000025 $0.0000005 ❌ ❌ google
+gemini-1.5-pro-latest 1000000 8192 $0.00000025 $0.0000005 ❌ ❌ google gemini-1.5-pro 1000000 8192 $0.00000025 $0.0000005 ❌ ❌ google
+gemini-experimental 1000000 8192 $0.00000025 $0.0000005 ❌ ❌ openai_azure gpt-4o 128000 4096 $0.000005 $0.000015 ✅ ✅ openai_azure
+gpt-4o-2024-05-13 128000 4096 $0.000005 $0.000015 ✅ ✅ openai_azure gpt-4-turbo-2024-04-09 128000 4096 $0.00003 $0.00006 ✅ ✅
+openai_azure gpt-4 8192 4096 $0.00003 $0.00006 ✅ ❌ openai_azure gpt-4-0314 8192 4096 $0.00003 $0.00006 ✅ ❌ openai_azure gpt-4-32k
+32768 4096 $0.00006 $0.00012 ✅ ❌ openai_azure gpt-4-0125 128000 4096 $0.00001 $0.00003 ✅ ❌ openai_azure gpt-4-1106 128000 4096
+$0.00001 $0.00003 ✅ ❌ openai_azure gpt-4-0613 8192 4096 $0.00003 $0.00006 ✅ ❌ openai_azure gpt-4-turbo 128000 4096 $0.00001
+$0.00003 ✅ ❌ openai_azure gpt-4-turbo-vision 128000 4096 $0.000003 $0.000004 ✅ ✅ openai_azure gpt-4-vision 128000 4096 $0.000003
+$0.000004 ✅ ✅ openai_azure gpt-35-turbo-1106 16384 4096 $0.0000015 $0.000002 ✅ ❌ openai_azure gpt-35-turbo-0125 16384 4096
+$0.0000005 $0.0000015 ✅ ❌ openai_azure gpt-35-turbo-16k 16384 4096 $0.000003 $0.000004 ✅ ❌ openai_azure gpt-35-turbo 4097 4096
+$0.0000015 $0.000002 ✅ ❌ openai_azure gpt-3.5-turbo-instruct 4097 4096 $0.0000015 $0.000002 ✅ ❌ openai_azure gpt-35-turbo-instruct
+4097 4097 $0.0000015 $0.000002 ✅ ❌ cohere command-r 128000 4000 $0.0000005 $0.0000015 ❌ ❌ cohere command-light 4096 4096 $0.000015
+$0.000015 ❌ ❌ cohere command-r-plus 128000 4000 $0.000003 $0.000015 ❌ ❌ cohere command-nightly 4096 4096 $0.000015 $0.000015 ❌ ❌
+cohere command 4096 4096 $0.000015 $0.000015 ❌ ❌ cohere command-medium-beta 4096 4096 $0.000015 $0.000015 ❌ ❌ cohere
+command-xlarge-beta 4096 4096 $0.000015 $0.000015 ❌ ❌ groq mixtral-8x7b-32768 32768 32768 $0.0 $0.0 ❌ ❌ groq llama3-8b-8192 8192
+8192 $0.0 $0.0 ❌ ❌ groq llama3-70b-8192 8192 8192 $0.0 $0.0 ❌ ❌ groq llama2-70b-4096 4096 4096 $0.0 $0.0 ❌ ❌ groq gemma-7b-it 8192
+8192 $0.0 $0.0 ❌ ❌ replicate llama-3-70b-instruct 8192 8192 $0.00000065 $0.00000275 ❌ ❌ replicate llama-3-70b 8192 8192
+$0.00000065 $0.00000275 ❌ ❌ replicate llama-3-8b-instruct 8192 8192 $0.00000005 $0.00000025 ❌ ❌ replicate llama-3-8b 8192 8192
+$0.00000005 $0.00000025 ❌ ❌ replicate llama-2-70b 4096 4096 $0.00003 $0.00006 ❌ ❌ replicate llama70b-v2 4096 4096 N/A N/A ❌ ❌
+replicate mixtral-8x7b 4096 4096 N/A N/A ❌ ❌",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/supported-models",
+ "title": "Supported Models",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -28062,6 +43939,40 @@ A user can be one of the following rolws:
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "References",
+ ],
+ "content": "Everyone invited to the organization can access all projects currently (controlling project access coming soon).
+
+A user can be one of the following rolws:
+
+Admin: The highest level of control. They can manage, modify, and oversee the organization's settings and have full functionality
+across all projects.
+
+Developer: (Enterprise tier only) Can deploy prompts, manage environments, create and add API keys, but lacks the ability to
+access billing or invite others.
+
+Member: (Enterprise tier only) The basic level of access. Can create and save prompts, run evaluations, but not deploy. Can not
+see any org-wide API keys.
+
+
+RBACS SUMMARY
+
+Here is the full breakdown of roles and access:
+
+Action Member Developer Admin Create and manage Prompts ✔️ ✔️ ✔️ Inspect logs and feedback ✔️ ✔️ ✔️ Create and manage evaluators
+✔️ ✔️ ✔️ Run evaluations ✔️ ✔️ ✔️ Create and manage datasets ✔️ ✔️ ✔️ Create and manage API keys ✔️ ✔️ Manage prompt deployments
+✔️ ✔️ Create and manage environments ✔️ ✔️ Send invites ✔️ Set user roles ✔️ Manage billing ✔️ Change organization settings ✔️",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/access-roles",
+ "title": "Access Roles",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -28115,6 +44026,134 @@ Our \`.prompt\` file format is a serialized version of a model config that is de
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "References",
+ ],
+ "content": "Our .prompt file format is a serialized version of a model config that is designed to be human-readable and suitable for checking
+into your version control systems alongside your code.
+
+
+FORMAT
+
+The .prompt file is heavily inspired by MDX [https://mdxjs.com/], with model and hyperparameters specified in a YAML header
+alongside a JSX-inspired format for your Chat Template.
+
+
+BASIC EXAMPLES
+
+\`\`\`jsx Chat --- model: gpt-4 temperature: 1.0 max_tokens: -1 provider: openai endpoint: chat --- You are a friendly assistant. \`\`\`
+\`\`\`jsx Completion --- model: claude-2 temperature: 0.7 max_tokens: 256 top_p: 1.0 provider: anthropic endpoint: complete ---
+Autocomplete the sentence.
+
+Context: {{context}}
+
+{{sentence}}
+
+
+
+### Multi-modality and Images
+
+Images can be specified using nested \`\` tags within a \`\` message. To specify text alongside the image, use a \`\` tag.
+
+\`\`\`jsx Image and Text
+---
+model: gpt-4-vision-preview
+temperature: 0.7
+max_tokens: 256
+provider: openai
+endpoint: chat
+tools: []
+---
+
+ You are a friendly assistant.
+
+
+
+
+ What is in this image?
+
+
+
+\`\`\`
+
+### Tools, tool calls and tool responses
+
+Specify the tools available to the model as a JSON list in the YAML header.
+
+Tool calls in assistant messages can be added with nested \`\` tags. A \`\` tag within an \`\` tag denotes a tool call of \`type: "function"\`, and requires the attributes \`name\` and \`id\`. The text wrapped in a \`\` tag should be a JSON-formatted string containing the tool call's arguments.
+
+Tool call responses can then be added with \`\` tags after the \`\` message.
+
+\`\`\`jsx
+---
+model: gpt-4
+temperature: 0.7
+max_tokens: 256
+top_p: 1.0
+presence_penalty: 0.0
+frequency_penalty: 0.0
+provider: openai
+endpoint: chat
+tools: [
+ {
+ "name": "get_current_weather",
+ "description": "Get the current weather in a given location",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "location": {
+ "type": "string",
+ "name": "Location",
+ "description": "The city and state, e.g. San Francisco, CA"
+ },
+ "unit": {
+ "type": "string",
+ "name": "Unit",
+ "enum": [
+ "celsius",
+ "fahrenheit"
+ ]
+ }
+ },
+ "required": [
+ "location"
+ ]
+ }
+ }
+]
+---
+
+ You are a friendly assistant.
+
+
+
+ What is the weather in SF?
+
+
+
+
+ {
+ "location": "San Francisco, CA"
+ }
+
+
+
+
+
+ Cloudy with a chance of meatballs.
+
+\`\`\`
+",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/prompt-file-format",
+ "title": ".prompt files",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -28333,6 +44372,73 @@ To help with direct API integrations, we maintain a [Postman Workspace](https://
"slug": "docs/v4/overview",
},
},
+ {
+ "breadcrumbs": [
+ "References",
+ ],
+ "content": "In our various guides we assumed the use of our Python SDK [https://pypi.org/project/humanloop/]. There are some use cases where
+this is not appropriate. For example, if you are integrating Humanloop from a non-Python backend, such as Node.js, or using a
+no-or-low-code builder such as Bubble [https://bubble.io/] or Zapier [https://zapier.com/]. In these cases, you can leverage our
+RESTful APIs [/api-reference/projects/get] directly.
+
+To help with direct API integrations, we maintain a Postman Workspace [https://www.postman.com/humanloop/workspace/humanloop] with
+various worked examples for the main endpoints you will need.
+
+
+PREREQUISITES
+
+ * A Humanloop account. If you don't have one, you can create an account now by going to the Sign up page
+ [https://app.humanloop.com/signup].
+
+
+SET YOUR API KEYS IN POSTMAN
+
+ * Navigate to your Humanloop profile page [https://app.humanloop.com/account/settings] and copy your Humanloop API key.
+ * Navigate to our Postman Workspace [https://www.postman.com/humanloop/workspace/humanloop/overview] and set the environment to
+ Production in the dropdown in the top right where it says No Environment
+ * Select the Environment quick look button beside the environment dropdown and paste your Humanloop API key into the CURRENT
+ VALUE of the user_api_key variable:
+
+[file:2d3fb35b-bf80-4f7c-b20a-89043ecf323e]
+ * Navigate to your OpenAI profile [https://beta.openai.com/account/api-keys] and copy the API key.
+ * Navigate back to our Postman Workspace and paste your OpenAI key into the CURRENT VALUE of the global open_ai_key variable:
+
+[file:64f81610-85f7-40ba-b939-375c79c3451b]
+
+You are now all set to use Postman to interact with the APIs with real examples!
+
+
+TRY OUT THE POSTMAN COLLECTIONS
+
+A **collection** is a set of executable API specifications that are grouped together in Postman.
+
+There are 4 executable collections provided to check out.
+
+The Chat collection is the best place to start to get a project setup and sending chat messages. To try it out:
+
+ * Expand the V4 Chat collection on the left hand side.
+ * Select Create chat sending model-config from the list
+ * Execute the POST calls in order from top to bottom by selecting them under the collection on the left hand side and pressing
+ the Send button on the right hand side. You should see the resulting response body appearing in the box below the request body.
+ * Try editing the request body and resending - you can reference the corresponding API guides
+ [https://humanloop.readme.io/reference] for a full spec of the request schema.
+
+[file:79960ffb-e62d-41ca-82e0-e1c658d2a286]
+ * If you now navigate to your Humanloop projects page [https://app.humanloop.com], you will see a new project called assistant
+ with logged data.
+ * You can now generate populated code snippets across a range of languages by selecting the code icon on the right hand side
+ beside the request and response bodies:
+
+[file:cafed71f-a689-4e1d-af6d-3bfb11d16e36]",
+ "indexSegmentId": "0",
+ "slug": "docs/v4/postman-workspace",
+ "title": "Postman Workspace",
+ "type": "page-v3",
+ "version": {
+ "id": "v4.0",
+ "slug": "docs/v4/overview",
+ },
+ },
{
"breadcrumbs": [
{
@@ -28415,16 +44521,10 @@ The **Chat** collection is the best place to start to get a project setup and se
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/prompts/log",
- "title": "Prompts",
- },
+ "Humanloop API",
+ "Prompts",
],
- "description": "Log to a Prompt.
+ "content": "Log to a Prompt.
You can use query parameters \`version_id\`, or \`environment\`, to target
an existing version of the Prompt. Otherwise, the default deployed version will be chosen.
@@ -28432,7 +44532,90 @@ an existing version of the Prompt. Otherwise, the default deployed version will
Instead of targeting an existing version explicitly, you can instead pass in
Prompt details in the request body. In this case, we will check if the details correspond
to an existing version of the Prompt. If they do not, we will create a new version. This is helpful
-in the case where you are storing or deriving your Prompt details in code.",
+in the case where you are storing or deriving your Prompt details in code.
+## Path Parameters
+
+- /prompts/log
+## Query Parameters
+
+- version_id=string? A specific Version ID of the Prompt to log to.
+- environment=string? Name of the Environment identifying a deployed version to log to.
+## Request
+
+### Body
+
+- evaluation_id=string? Unique identifier for the Evaluation Report to associate the Log to.
+- path=string? Path of the Prompt, including the name. This locates the Prompt in the Humanloop filesystem and is used as as a unique identifier.
+Example: folder/name or just name.
+- id=string? ID for an existing Prompt.
+- output_message=type_:ChatMessage: content=unknown The content of the message.
+name=unknown Optional name of the message author.
+tool_call_id=unknown Tool call that this message is responding to.
+role=unknown Role of the message author.
+tool_calls=unknown A list of tool calls requested by the assistant. ? The message returned by the provider.
+- prompt_tokens=integer? Number of tokens in the prompt used to generate the output.
+- output_tokens=integer? Number of tokens in the output generated by the model.
+- prompt_cost=double? Cost in dollars associated to the tokens in the prompt.
+- output_cost=double? Cost in dollars associated to the tokens in the output.
+- finish_reason=string? Reason the generation finished.
+- messages=List? The messages passed to the to provider chat endpoint.
+- tool_choice=type_prompts:PromptLogRequestToolChoice: unknown | unknown | unknown | unknown Tool Choice Controls how the model uses tools. The following options are supported:
+
+- \`'none'\` means the model will not call any tool and instead generates a message; this is the default when no tools are provided as part of the Prompt.
+- \`'auto'\` means the model can decide to call one or more of the provided tools; this is the default when tools are provided as part of the Prompt.
+- \`'required'\` means the model can decide to call one or more of the provided tools.
+- \`{'type': 'function', 'function': {name': }}\` forces the model to use the named function.? Controls how the model uses tools. The following options are supported:
+
+ * 'none' means the model will not call any tool and instead generates a message; this is the default when no tools are provided
+ as part of the Prompt.
+ * 'auto' means the model can decide to call one or more of the provided tools; this is the default when tools are provided as
+ part of the Prompt.
+ * 'required' means the model can decide to call one or more of the provided tools.
+ * {'type': 'function', 'function': {name': }} forces the model to use the named function.
+- prompt=type_:PromptKernelRequest: model=unknown The model instance used, e.g. \`gpt-4\`. See [supported models](https://humanloop.com/docs/supported-models)
+endpoint=unknown The provider model endpoint used.
+template=unknown For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.
+provider=unknown The company providing the underlying model service.
+max_tokens=unknown The maximum number of tokens to generate. Provide max_tokens=-1 to dynamically calculate the maximum number of tokens to generate given the length of the prompt
+temperature=unknown What sampling temperature to use when making a generation. Higher values means the model will be more creative.
+top_p=unknown An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass.
+stop=unknown The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.
+presence_penalty=unknown Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the generation so far.
+frequency_penalty=unknown Number between -2.0 and 2.0. Positive values penalize new tokens based on how frequently they appear in the generation so far.
+other=unknown Other parameter values to be passed to the provider call.
+seed=unknown If specified, model will make a best effort to sample deterministically, but it is not guaranteed.
+response_format=unknown The format of the response. Only \`{"type": "json_object"}\` is currently supported for chat.
+tools=unknown The tool specification that the model can choose to call if Tool calling is supported.
+linked_tools=unknown The IDs of the Tools in your organization that the model can choose to call if Tool calling is supported. The default deployed version of that tool is called.
+attributes=unknown Additional fields to describe the Prompt. Helpful to separate Prompt versions from each other with details on how they were created or used. ? Details of your Prompt. A new Prompt version will be created if the provided details are new.
+- start_time=datetime? When the logged event started.
+- end_time=datetime? When the logged event ended.
+- output=string? Generated output from your model for the provided inputs. Can be None if logging an error, or if creating a parent Log with the
+intention to populate it later.
+- created_at=datetime? User defined timestamp for when the log was created.
+- error=string? Error message if the log is an error.
+- provider_latency=double? Duration of the logged event in seconds.
+- stdout=string? Captured log and debug statements.
+- provider_request=Map? Raw request sent to provider.
+- provider_response=Map? Raw response received the provider.
+- inputs=Map? The inputs passed to the prompt template.
+- source=string? Identifies where the model was called from.
+- metadata=Map? Any additional metadata to record.
+- source_datapoint_id=string? Unique identifier for the Datapoint that this Log is derived from. This can be used by Humanloop to associate Logs to Evaluations.
+If provided, Humanloop will automatically associate this Log to Evaluations that require a Log for this Datapoint-Version pair.
+- trace_parent_id=string? The ID of the parent Log to nest this Log under in a Trace.
+- batches=List? Array of Batch Ids that this log is part of. Batches are used to group Logs together for offline Evaluations
+- user=string? End-user ID related to the Log.
+- environment=string? The name of the Environment the Log is associated to.
+- save=boolean? Whether the request/response payloads will be stored on Humanloop.
+## Response
+
+### Body
+
+type_:CreatePromptLogResponse: id=string String ID of log.
+prompt_id=string ID of the Prompt the log belongs to.
+version_id=string ID of the specific version of the Prompt.
+session_id=string? String ID of session the log belongs to. : ",
"endpointPath": [
{
"type": "literal",
@@ -28444,7 +44627,7 @@ in the case where you are storing or deriving your Prompt details in code.",
"method": "POST",
"slug": "docs/v5/api-reference/prompts/log",
"title": "Log",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -33446,18 +49629,173 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/prompts/log",
- "title": "Prompts",
- },
+ "Humanloop API",
+ "Prompts",
],
- "description": "Update a Log.
+ "content": "Update a Log.
+
+Update the details of a Log with the given ID.
+## Path Parameters
+
+- /prompts/
+- id
+- /log/
+- log_id
+## Request
-Update the details of a Log with the given ID.",
+### Body
+
+- output_message=type_:ChatMessage: content=unknown The content of the message.
+name=unknown Optional name of the message author.
+tool_call_id=unknown Tool call that this message is responding to.
+role=unknown Role of the message author.
+tool_calls=unknown A list of tool calls requested by the assistant. ? The message returned by the provider.
+- prompt_tokens=integer? Number of tokens in the prompt used to generate the output.
+- output_tokens=integer? Number of tokens in the output generated by the model.
+- prompt_cost=double? Cost in dollars associated to the tokens in the prompt.
+- output_cost=double? Cost in dollars associated to the tokens in the output.
+- finish_reason=string? Reason the generation finished.
+- messages=List? The messages passed to the to provider chat endpoint.
+- tool_choice=type_prompts:PromptLogUpdateRequestToolChoice: unknown | unknown | unknown | unknown Tool Choice Controls how the model uses tools. The following options are supported:
+
+- \`'none'\` means the model will not call any tool and instead generates a message; this is the default when no tools are provided as part of the Prompt.
+- \`'auto'\` means the model can decide to call one or more of the provided tools; this is the default when tools are provided as part of the Prompt.
+- \`'required'\` means the model can decide to call one or more of the provided tools.
+- \`{'type': 'function', 'function': {name': }}\` forces the model to use the named function.? Controls how the model uses tools. The following options are supported:
+
+ * 'none' means the model will not call any tool and instead generates a message; this is the default when no tools are provided
+ as part of the Prompt.
+ * 'auto' means the model can decide to call one or more of the provided tools; this is the default when tools are provided as
+ part of the Prompt.
+ * 'required' means the model can decide to call one or more of the provided tools.
+ * {'type': 'function', 'function': {name': }} forces the model to use the named function.
+- output=string? Generated output from your model for the provided inputs. Can be None if logging an error, or if creating a parent Log with the
+intention to populate it later.
+- created_at=datetime? User defined timestamp for when the log was created.
+- error=string? Error message if the log is an error.
+- provider_latency=double? Duration of the logged event in seconds.
+- stdout=string? Captured log and debug statements.
+- provider_request=Map? Raw request sent to provider.
+- provider_response=Map? Raw response received the provider.
+- inputs=Map? The inputs passed to the prompt template.
+- source=string? Identifies where the model was called from.
+- metadata=Map? Any additional metadata to record.
+- start_time=datetime? When the logged event started.
+- end_time=datetime? When the logged event ended.
+## Response
+
+### Body
+
+type_:LogResponse: type_:PromptLogResponse: output_message=unknown The message returned by the provider.
+prompt_tokens=unknown Number of tokens in the prompt used to generate the output.
+output_tokens=unknown Number of tokens in the output generated by the model.
+prompt_cost=unknown Cost in dollars associated to the tokens in the prompt.
+output_cost=unknown Cost in dollars associated to the tokens in the output.
+finish_reason=unknown Reason the generation finished.
+messages=unknown The messages passed to the to provider chat endpoint.
+tool_choice=unknown Controls how the model uses tools. The following options are supported:
+
+- \`'none'\` means the model will not call any tool and instead generates a message; this is the default when no tools are provided as part of the Prompt.
+- \`'auto'\` means the model can decide to call one or more of the provided tools; this is the default when tools are provided as part of the Prompt.
+- \`'required'\` means the model can decide to call one or more of the provided tools.
+- \`{'type': 'function', 'function': {name': }}\` forces the model to use the named function.
+prompt=unknown Prompt used to generate the Log.
+start_time=unknown When the logged event started.
+end_time=unknown When the logged event ended.
+output=unknown Generated output from your model for the provided inputs. Can be \`None\` if logging an error, or if creating a parent Log with the intention to populate it later.
+created_at=unknown User defined timestamp for when the log was created.
+error=unknown Error message if the log is an error.
+provider_latency=unknown Duration of the logged event in seconds.
+stdout=unknown Captured log and debug statements.
+provider_request=unknown Raw request sent to provider.
+provider_response=unknown Raw response received the provider.
+inputs=unknown The inputs passed to the prompt template.
+source=unknown Identifies where the model was called from.
+metadata=unknown Any additional metadata to record.
+source_datapoint_id=unknown Unique identifier for the Datapoint that this Log is derived from. This can be used by Humanloop to associate Logs to Evaluations. If provided, Humanloop will automatically associate this Log to Evaluations that require a Log for this Datapoint-Version pair.
+trace_parent_id=unknown The ID of the parent Log to nest this Log under in a Trace.
+batches=unknown Array of Batch Ids that this log is part of. Batches are used to group Logs together for offline Evaluations
+user=unknown End-user ID related to the Log.
+environment=unknown The name of the Environment the Log is associated to.
+save=unknown Whether the request/response payloads will be stored on Humanloop.
+id=unknown Unique identifier for the Log.
+evaluator_logs=unknown List of Evaluator Logs associated with the Log. These contain Evaluator judgments on the Log.
+trace_flow_id=unknown Identifier for the Flow that the Trace belongs to.
+trace_id=unknown Identifier for the Trace that the Log belongs to.
+trace_children=unknown Logs nested under this Log in the Trace. General request for creating a Log Prompt Log Response | type_:ToolLogResponse: start_time=unknown When the logged event started.
+end_time=unknown When the logged event ended.
+output=unknown Generated output from your model for the provided inputs. Can be \`None\` if logging an error, or if creating a parent Log with the intention to populate it later.
+created_at=unknown User defined timestamp for when the log was created.
+error=unknown Error message if the log is an error.
+provider_latency=unknown Duration of the logged event in seconds.
+stdout=unknown Captured log and debug statements.
+provider_request=unknown Raw request sent to provider.
+provider_response=unknown Raw response received the provider.
+inputs=unknown The inputs passed to the prompt template.
+source=unknown Identifies where the model was called from.
+metadata=unknown Any additional metadata to record.
+source_datapoint_id=unknown Unique identifier for the Datapoint that this Log is derived from. This can be used by Humanloop to associate Logs to Evaluations. If provided, Humanloop will automatically associate this Log to Evaluations that require a Log for this Datapoint-Version pair.
+trace_parent_id=unknown The ID of the parent Log to nest this Log under in a Trace.
+batches=unknown Array of Batch Ids that this log is part of. Batches are used to group Logs together for offline Evaluations
+user=unknown End-user ID related to the Log.
+environment=unknown The name of the Environment the Log is associated to.
+save=unknown Whether the request/response payloads will be stored on Humanloop.
+id=unknown Unique identifier for the Log.
+evaluator_logs=unknown List of Evaluator Logs associated with the Log. These contain Evaluator judgments on the Log.
+trace_flow_id=unknown Identifier for the Flow that the Trace belongs to.
+trace_id=unknown Identifier for the Trace that the Log belongs to.
+trace_children=unknown Logs nested under this Log in the Trace.
+tool=unknown Tool used to generate the Log. General request for creating a Log Tool Log Response | type_:EvaluatorLogResponse: start_time=unknown When the logged event started.
+end_time=unknown When the logged event ended.
+output=unknown Generated output from your model for the provided inputs. Can be \`None\` if logging an error, or if creating a parent Log with the intention to populate it later.
+created_at=unknown User defined timestamp for when the log was created.
+error=unknown Error message if the log is an error.
+provider_latency=unknown Duration of the logged event in seconds.
+stdout=unknown Captured log and debug statements.
+provider_request=unknown Raw request sent to provider.
+provider_response=unknown Raw response received the provider.
+inputs=unknown The inputs passed to the prompt template.
+source=unknown Identifies where the model was called from.
+metadata=unknown Any additional metadata to record.
+parent_id=unknown Identifier of the evaluated Log. The newly created Log will have this one set as parent.
+source_datapoint_id=unknown Unique identifier for the Datapoint that this Log is derived from. This can be used by Humanloop to associate Logs to Evaluations. If provided, Humanloop will automatically associate this Log to Evaluations that require a Log for this Datapoint-Version pair.
+trace_parent_id=unknown The ID of the parent Log to nest this Log under in a Trace.
+batches=unknown Array of Batch Ids that this log is part of. Batches are used to group Logs together for offline Evaluations
+user=unknown End-user ID related to the Log.
+environment=unknown The name of the Environment the Log is associated to.
+save=unknown Whether the request/response payloads will be stored on Humanloop.
+judgment=unknown Evaluator assessment of the Log.
+id=unknown Unique identifier for the Log.
+evaluator_logs=unknown List of Evaluator Logs associated with the Log. These contain Evaluator judgments on the Log.
+trace_flow_id=unknown Identifier for the Flow that the Trace belongs to.
+trace_id=unknown Identifier for the Trace that the Log belongs to.
+trace_children=unknown Logs nested under this Log in the Trace.
+evaluator=unknown Evaluator used to generate the judgment.
+parent=unknown The Log that was evaluated. Only provided if the ?include_parent query parameter is set for the General request for creating a Log Evaluator Log Response | type_:FlowLogResponse: start_time=unknown When the logged event started.
+end_time=unknown When the logged event ended.
+output=unknown Generated output from your model for the provided inputs. Can be \`None\` if logging an error, or if creating a parent Log with the intention to populate it later.
+created_at=unknown User defined timestamp for when the log was created.
+error=unknown Error message if the log is an error.
+provider_latency=unknown Duration of the logged event in seconds.
+stdout=unknown Captured log and debug statements.
+provider_request=unknown Raw request sent to provider.
+provider_response=unknown Raw response received the provider.
+inputs=unknown The inputs passed to the Flow Log.
+source=unknown Identifies where the model was called from.
+metadata=unknown Any additional metadata to record.
+source_datapoint_id=unknown Unique identifier for the Datapoint that this Log is derived from. This can be used by Humanloop to associate Logs to Evaluations. If provided, Humanloop will automatically associate this Log to Evaluations that require a Log for this Datapoint-Version pair.
+trace_parent_id=unknown The ID of the parent Log to nest this Log under in a Trace.
+batches=unknown Array of Batch Ids that this log is part of. Batches are used to group Logs together for offline Evaluations
+user=unknown End-user ID related to the Log.
+environment=unknown The name of the Environment the Log is associated to.
+save=unknown Whether the request/response payloads will be stored on Humanloop.
+id=unknown Unique identifier for the Log.
+evaluator_logs=unknown List of Evaluator Logs associated with the Log. These contain Evaluator judgments on the Log.
+trace_flow_id=unknown Identifier for the Flow that the Trace belongs to.
+trace_id=unknown Identifier for the Trace that the Log belongs to.
+trace_children=unknown Logs nested under this Log in the Trace.
+flow=unknown Flow used to generate the Log.
+trace_status=unknown Status of the Trace. When a Trace is marked as \`complete\`, no more Logs can be added to it. Monitoring Evaluators will only run on completed Traces. General request for creating a Log Flow Log Response : ",
"endpointPath": [
{
"type": "literal",
@@ -33481,7 +49819,7 @@ Update the details of a Log with the given ID.",
"method": "PATCH",
"slug": "docs/v5/api-reference/prompts/update",
"title": "Update Log",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -52911,16 +69249,10 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/prompts/log",
- "title": "Prompts",
- },
+ "Humanloop API",
+ "Prompts",
],
- "description": "Call a Prompt.
+ "content": "Call a Prompt.
Calling a Prompt calls the model provider before logging
the request, responses and metadata to Humanloop.
@@ -52931,7 +69263,145 @@ an existing version of the Prompt. Otherwise the default deployed version will b
Instead of targeting an existing version explicitly, you can instead pass in
Prompt details in the request body. In this case, we will check if the details correspond
to an existing version of the Prompt. If they do not, we will create a new version. This is helpful
-in the case where you are storing or deriving your Prompt details in code.",
+in the case where you are storing or deriving your Prompt details in code.
+## Path Parameters
+
+- /prompts/call
+## Query Parameters
+
+- version_id=string? A specific Version ID of the Prompt to log to.
+- environment=string? Name of the Environment identifying a deployed version to log to.
+## Request
+
+### Body
+
+- path=string? Path of the Prompt, including the name. This locates the Prompt in the Humanloop filesystem and is used as as a unique identifier.
+Example: folder/name or just name.
+- id=string? ID for an existing Prompt.
+- messages=List? The messages passed to the to provider chat endpoint.
+- tool_choice=type_prompts:PromptsCallRequestToolChoice: unknown | unknown | unknown | unknown Tool Choice Controls how the model uses tools. The following options are supported:
+
+- \`'none'\` means the model will not call any tool and instead generates a message; this is the default when no tools are provided as part of the Prompt.
+- \`'auto'\` means the model can decide to call one or more of the provided tools; this is the default when tools are provided as part of the Prompt.
+- \`'required'\` means the model can decide to call one or more of the provided tools.
+- \`{'type': 'function', 'function': {name': }}\` forces the model to use the named function.? Controls how the model uses tools. The following options are supported:
+
+ * 'none' means the model will not call any tool and instead generates a message; this is the default when no tools are provided
+ as part of the Prompt.
+ * 'auto' means the model can decide to call one or more of the provided tools; this is the default when tools are provided as
+ part of the Prompt.
+ * 'required' means the model can decide to call one or more of the provided tools.
+ * {'type': 'function', 'function': {name': }} forces the model to use the named function.
+- prompt=type_:PromptKernelRequest: model=unknown The model instance used, e.g. \`gpt-4\`. See [supported models](https://humanloop.com/docs/supported-models)
+endpoint=unknown The provider model endpoint used.
+template=unknown For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.
+provider=unknown The company providing the underlying model service.
+max_tokens=unknown The maximum number of tokens to generate. Provide max_tokens=-1 to dynamically calculate the maximum number of tokens to generate given the length of the prompt
+temperature=unknown What sampling temperature to use when making a generation. Higher values means the model will be more creative.
+top_p=unknown An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass.
+stop=unknown The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.
+presence_penalty=unknown Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the generation so far.
+frequency_penalty=unknown Number between -2.0 and 2.0. Positive values penalize new tokens based on how frequently they appear in the generation so far.
+other=unknown Other parameter values to be passed to the provider call.
+seed=unknown If specified, model will make a best effort to sample deterministically, but it is not guaranteed.
+response_format=unknown The format of the response. Only \`{"type": "json_object"}\` is currently supported for chat.
+tools=unknown The tool specification that the model can choose to call if Tool calling is supported.
+linked_tools=unknown The IDs of the Tools in your organization that the model can choose to call if Tool calling is supported. The default deployed version of that tool is called.
+attributes=unknown Additional fields to describe the Prompt. Helpful to separate Prompt versions from each other with details on how they were created or used. ? Details of your Prompt. A new Prompt version will be created if the provided details are new.
+- inputs=Map? The inputs passed to the prompt template.
+- source=string? Identifies where the model was called from.
+- metadata=Map? Any additional metadata to record.
+- start_time=datetime? When the logged event started.
+- end_time=datetime? When the logged event ended.
+- source_datapoint_id=string? Unique identifier for the Datapoint that this Log is derived from. This can be used by Humanloop to associate Logs to Evaluations.
+If provided, Humanloop will automatically associate this Log to Evaluations that require a Log for this Datapoint-Version pair.
+- trace_parent_id=string? The ID of the parent Log to nest this Log under in a Trace.
+- batches=List? Array of Batch Ids that this log is part of. Batches are used to group Logs together for offline Evaluations
+- user=string? End-user ID related to the Log.
+- environment=string? The name of the Environment the Log is associated to.
+- save=boolean? Whether the request/response payloads will be stored on Humanloop.
+- provider_api_keys=type_:ProviderApiKeys: openai=unknown
+ai21=unknown
+mock=unknown
+anthropic=unknown
+bedrock=unknown
+cohere=unknown
+openai_azure=unknown
+openai_azure_endpoint=unknown ? API keys required by each provider to make API calls. The API keys provided here are not stored by Humanloop. If not specified
+here, Humanloop will fall back to the key saved to your organization.
+- num_samples=integer? The number of generations.
+- stream=false If true, tokens will be sent as data-only server-sent events. If num_samples > 1, samples are streamed back independently.
+- return_inputs=boolean? Whether to return the inputs in the response. If false, the response will contain an empty dictionary under inputs. This is useful
+for reducing the size of the response. Defaults to true.
+- logprobs=integer? Include the log probabilities of the top n tokens in the provider_response
+- suffix=string? The suffix that comes after a completion of inserted text. Useful for completions that act like inserts.
+## Response
+
+### Body
+
+type_:PromptCallResponse: start_time=datetime? When the logged event started.
+end_time=datetime? When the logged event ended.
+messages=List? The messages passed to the to provider chat endpoint.
+tool_choice=type_:PromptCallResponseToolChoice: unknown Controls how the model uses tools. The following options are supported:
+
+- \`'none'\` means the model will not call any tool and instead generates a message; this is the default when no tools are provided as part of the Prompt.
+- \`'auto'\` means the model can decide to call one or more of the provided tools; this is the default when tools are provided as part of the Prompt.
+- \`'required'\` means the model can decide to call one or more of the provided tools.
+- \`{'type': 'function', 'function': {name': }}\` forces the model to use the named function.? Controls how the model uses tools. The following options are supported:
+
+- \`'none'\` means the model will not call any tool and instead generates a message; this is the default when no tools are provided as part of the Prompt.
+- \`'auto'\` means the model can decide to call one or more of the provided tools; this is the default when tools are provided as part of the Prompt.
+- \`'required'\` means the model can decide to call one or more of the provided tools.
+- \`{'type': 'function', 'function': {name': }}\` forces the model to use the named function.
+prompt=type_:PromptResponse: path=unknown Path of the Prompt, including the name, which is used as a unique identifier.
+id=unknown Unique identifier for the Prompt.
+directory_id=unknown ID of the directory that the file is in on Humanloop.
+model=unknown The model instance used, e.g. \`gpt-4\`. See [supported models](https://humanloop.com/docs/supported-models)
+endpoint=unknown The provider model endpoint used.
+template=unknown For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.
+provider=unknown The company providing the underlying model service.
+max_tokens=unknown The maximum number of tokens to generate. Provide max_tokens=-1 to dynamically calculate the maximum number of tokens to generate given the length of the prompt
+temperature=unknown What sampling temperature to use when making a generation. Higher values means the model will be more creative.
+top_p=unknown An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass.
+stop=unknown The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.
+presence_penalty=unknown Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the generation so far.
+frequency_penalty=unknown Number between -2.0 and 2.0. Positive values penalize new tokens based on how frequently they appear in the generation so far.
+other=unknown Other parameter values to be passed to the provider call.
+seed=unknown If specified, model will make a best effort to sample deterministically, but it is not guaranteed.
+response_format=unknown The format of the response. Only \`{"type": "json_object"}\` is currently supported for chat.
+tools=unknown The tool specification that the model can choose to call if Tool calling is supported.
+linked_tools=unknown The tools linked to your prompt that the model can call.
+attributes=unknown Additional fields to describe the Prompt. Helpful to separate Prompt versions from each other with details on how they were created or used.
+commit_message=unknown Message describing the changes made.
+name=unknown Name of the Prompt.
+version_id=unknown Unique identifier for the specific Prompt Version. If no query params provided, the default deployed Prompt Version is returned.
+type=unknown
+environments=unknown The list of environments the Prompt Version is deployed to.
+created_at=unknown
+updated_at=unknown
+created_by=unknown The user who created the Prompt.
+status=unknown The status of the Prompt Version.
+last_used_at=unknown
+version_logs_count=unknown The number of logs that have been generated for this Prompt Version
+total_logs_count=unknown The number of logs that have been generated across all Prompt Versions
+inputs=unknown Inputs associated to the Prompt. Inputs correspond to any of the variables used within the Prompt template.
+evaluators=unknown Evaluators that have been attached to this Prompt that are used for monitoring logs.
+evaluator_aggregates=unknown Aggregation of Evaluator results for the Prompt Version. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions. Prompt used to generate the Log.
+inputs=Map? The inputs passed to the prompt template.
+source=string? Identifies where the model was called from.
+metadata=Map? Any additional metadata to record.
+source_datapoint_id=string? Unique identifier for the Datapoint that this Log is derived from. This can be used by Humanloop to associate Logs to Evaluations. If provided, Humanloop will automatically associate this Log to Evaluations that require a Log for this Datapoint-Version pair.
+trace_parent_id=string? The ID of the parent Log to nest this Log under in a Trace.
+batches=List? Array of Batch Ids that this log is part of. Batches are used to group Logs together for offline Evaluations
+user=string? End-user ID related to the Log.
+environment=string? The name of the Environment the Log is associated to.
+save=boolean? Whether the request/response payloads will be stored on Humanloop.
+id=string ID of the log.
+trace_id=string? ID of the Trace containing the Prompt Call Log.
+logs=List The logs generated by the Prompt call. Response model for a Prompt call with potentially multiple log samples.: ",
"endpointPath": [
{
"type": "literal",
@@ -52943,7 +69413,7 @@ in the case where you are storing or deriving your Prompt details in code.",
"method": "POST",
"slug": "docs/v5/api-reference/prompts/call",
"title": "Call",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -62150,16 +78620,32 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/prompts/log",
- "title": "Prompts",
- },
+ "Humanloop API",
+ "Prompts",
],
- "description": "Get a list of all Prompts.",
+ "content": "Get a list of all Prompts.
+## Path Parameters
+
+- /prompts
+## Query Parameters
+
+- page=integer? Page number for pagination.
+- size=integer? Page size for pagination. Number of Prompts to fetch.
+- name=string? Case-insensitive filter for Prompt name.
+- user_filter=string? Case-insensitive filter for users in the Prompt. This filter matches against both email address and name of users.
+- sort_by=type_:ProjectSortBy: created_at (),updated_at (),name () An enumeration.? Field to sort Prompts by
+- order=type_:SortOrder: asc (),desc () An enumeration.? Direction to sort by.
+## Response
+
+### Body
+
+type_:PaginatedDataPromptResponse: records=List
+page=integer
+size=integer
+total=integer : ",
"endpointPath": [
{
"type": "literal",
@@ -62171,7 +78657,7 @@ An enumeration.",
"method": "GET",
"slug": "docs/v5/api-reference/prompts/list",
"title": "List ",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -65522,22 +82008,90 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/prompts/log",
- "title": "Prompts",
- },
+ "Humanloop API",
+ "Prompts",
],
- "description": "Create a Prompt or update it with a new version if it already exists.
+ "content": "Create a Prompt or update it with a new version if it already exists.
Prompts are identified by the \`ID\` or their \`path\`. The parameters (i.e. the prompt template, temperature, model etc.) determine the versions of the Prompt.
If you provide a commit message, then the new version will be committed;
otherwise it will be uncommitted. If you try to commit an already committed version,
-an exception will be raised.",
+an exception will be raised.
+## Path Parameters
+
+- /prompts
+## Request
+
+### Body
+
+- path=string? Path of the Prompt, including the name. This locates the Prompt in the Humanloop filesystem and is used as as a unique identifier.
+Example: folder/name or just name.
+- id=string? ID for an existing Prompt.
+- model=string The model instance used, e.g. gpt-4. See supported models [https://humanloop.com/docs/supported-models]
+- endpoint=type_:ModelEndpoints: complete (),chat (),edit () Supported model provider endpoints.? The provider model endpoint used.
+- template=type_prompts:PromptRequestTemplate: unknown | unknown For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.? For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the
+template should be specified with double curly bracket syntax: {{INPUT_NAME}}.
+- provider=type_:ModelProviders: openai (),openai_azure (),mock (),anthropic (),bedrock (),cohere (),replicate (),google (),groq () Supported model providers.? The company providing the underlying model service.
+- max_tokens=integer? The maximum number of tokens to generate. Provide max_tokens=-1 to dynamically calculate the maximum number of tokens to generate
+given the length of the prompt
+- temperature=double? What sampling temperature to use when making a generation. Higher values means the model will be more creative.
+- top_p=double? An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with
+top_p probability mass.
+- stop=type_prompts:PromptRequestStop: unknown | unknown The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.? The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.
+- presence_penalty=double? Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the generation so far.
+- frequency_penalty=double? Number between -2.0 and 2.0. Positive values penalize new tokens based on how frequently they appear in the generation so far.
+- other=Map? Other parameter values to be passed to the provider call.
+- seed=integer? If specified, model will make a best effort to sample deterministically, but it is not guaranteed.
+- response_format=type_:ResponseFormat: type=unknown
+json_schema=unknown The JSON schema of the response format if type is json_schema. Response format of the model.? The format of the response. Only {"type": "json_object"} is currently supported for chat.
+- tools=List? The tool specification that the model can choose to call if Tool calling is supported.
+- linked_tools=List? The IDs of the Tools in your organization that the model can choose to call if Tool calling is supported. The default deployed
+version of that tool is called.
+- attributes=Map? Additional fields to describe the Prompt. Helpful to separate Prompt versions from each other with details on how they were
+created or used.
+- commit_message=string? Message describing the changes made.
+## Response
+
+### Body
+
+type_:PromptResponse: path=string Path of the Prompt, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Prompt.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+model=string The model instance used, e.g. \`gpt-4\`. See [supported models](https://humanloop.com/docs/supported-models)
+endpoint=type_:ModelEndpoints: unknown Supported model provider endpoints.? The provider model endpoint used.
+template=type_:PromptResponseTemplate: unknown For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.? For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.
+provider=type_:ModelProviders: unknown Supported model providers.? The company providing the underlying model service.
+max_tokens=integer? The maximum number of tokens to generate. Provide max_tokens=-1 to dynamically calculate the maximum number of tokens to generate given the length of the prompt
+temperature=double? What sampling temperature to use when making a generation. Higher values means the model will be more creative.
+top_p=double? An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass.
+stop=type_:PromptResponseStop: unknown The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.? The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.
+presence_penalty=double? Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the generation so far.
+frequency_penalty=double? Number between -2.0 and 2.0. Positive values penalize new tokens based on how frequently they appear in the generation so far.
+other=Map? Other parameter values to be passed to the provider call.
+seed=integer? If specified, model will make a best effort to sample deterministically, but it is not guaranteed.
+response_format=type_:ResponseFormat: unknown Response format of the model.? The format of the response. Only \`{"type": "json_object"}\` is currently supported for chat.
+tools=List? The tool specification that the model can choose to call if Tool calling is supported.
+linked_tools=List? The tools linked to your prompt that the model can call.
+attributes=Map? Additional fields to describe the Prompt. Helpful to separate Prompt versions from each other with details on how they were created or used.
+commit_message=string? Message describing the changes made.
+name=string Name of the Prompt.
+version_id=string Unique identifier for the specific Prompt Version. If no query params provided, the default deployed Prompt Version is returned.
+type=prompt?
+environments=List? The list of environments the Prompt Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Prompt.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Prompt Version.
+last_used_at=datetime
+version_logs_count=integer The number of logs that have been generated for this Prompt Version
+total_logs_count=integer The number of logs that have been generated across all Prompt Versions
+inputs=List Inputs associated to the Prompt. Inputs correspond to any of the variables used within the Prompt template.
+evaluators=List? Evaluators that have been attached to this Prompt that are used for monitoring logs.
+evaluator_aggregates=List? Aggregation of Evaluator results for the Prompt Version. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -65549,7 +82103,7 @@ an exception will be raised.",
"method": "POST",
"slug": "docs/v5/api-reference/prompts/upsert",
"title": "Upsert",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -71881,19 +88435,62 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/prompts/log",
- "title": "Prompts",
- },
+ "Humanloop API",
+ "Prompts",
],
- "description": "Retrieve the Prompt with the given ID.
+ "content": "Retrieve the Prompt with the given ID.
By default, the deployed version of the Prompt is returned. Use the query parameters
-\`version_id\` or \`environment\` to target a specific version of the Prompt.",
+\`version_id\` or \`environment\` to target a specific version of the Prompt.
+## Path Parameters
+
+- /prompts/
+- id
+## Query Parameters
+
+- version_id=string? A specific Version ID of the Prompt to retrieve.
+- environment=string? Name of the Environment to retrieve a deployed Version from.
+## Response
+
+### Body
+
+type_:PromptResponse: path=string Path of the Prompt, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Prompt.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+model=string The model instance used, e.g. \`gpt-4\`. See [supported models](https://humanloop.com/docs/supported-models)
+endpoint=type_:ModelEndpoints: unknown Supported model provider endpoints.? The provider model endpoint used.
+template=type_:PromptResponseTemplate: unknown For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.? For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.
+provider=type_:ModelProviders: unknown Supported model providers.? The company providing the underlying model service.
+max_tokens=integer? The maximum number of tokens to generate. Provide max_tokens=-1 to dynamically calculate the maximum number of tokens to generate given the length of the prompt
+temperature=double? What sampling temperature to use when making a generation. Higher values means the model will be more creative.
+top_p=double? An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass.
+stop=type_:PromptResponseStop: unknown The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.? The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.
+presence_penalty=double? Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the generation so far.
+frequency_penalty=double? Number between -2.0 and 2.0. Positive values penalize new tokens based on how frequently they appear in the generation so far.
+other=Map? Other parameter values to be passed to the provider call.
+seed=integer? If specified, model will make a best effort to sample deterministically, but it is not guaranteed.
+response_format=type_:ResponseFormat: unknown Response format of the model.? The format of the response. Only \`{"type": "json_object"}\` is currently supported for chat.
+tools=List? The tool specification that the model can choose to call if Tool calling is supported.
+linked_tools=List? The tools linked to your prompt that the model can call.
+attributes=Map? Additional fields to describe the Prompt. Helpful to separate Prompt versions from each other with details on how they were created or used.
+commit_message=string? Message describing the changes made.
+name=string Name of the Prompt.
+version_id=string Unique identifier for the specific Prompt Version. If no query params provided, the default deployed Prompt Version is returned.
+type=prompt?
+environments=List? The list of environments the Prompt Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Prompt.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Prompt Version.
+last_used_at=datetime
+version_logs_count=integer The number of logs that have been generated for this Prompt Version
+total_logs_count=integer The number of logs that have been generated across all Prompt Versions
+inputs=List Inputs associated to the Prompt. Inputs correspond to any of the variables used within the Prompt template.
+evaluators=List? Evaluators that have been attached to this Prompt that are used for monitoring logs.
+evaluator_aggregates=List? Aggregation of Evaluator results for the Prompt Version. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -71909,7 +88506,7 @@ By default, the deployed version of the Prompt is returned. Use the query parame
"method": "GET",
"slug": "docs/v5/api-reference/prompts/get",
"title": "Get",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -76844,16 +93441,14 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/prompts/log",
- "title": "Prompts",
- },
+ "Humanloop API",
+ "Prompts",
],
- "description": "Delete the Prompt with the given ID.",
+ "content": "Delete the Prompt with the given ID.
+## Path Parameters
+
+- /prompts/
+- id",
"endpointPath": [
{
"type": "literal",
@@ -76869,7 +93464,7 @@ An enumeration.",
"method": "DELETE",
"slug": "docs/v5/api-reference/prompts/delete",
"title": "Delete",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -77167,16 +93762,61 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/prompts/log",
- "title": "Prompts",
- },
+ "Humanloop API",
+ "Prompts",
],
- "description": "Move the Prompt to a different path or change the name.",
+ "content": "Move the Prompt to a different path or change the name.
+## Path Parameters
+
+- /prompts/
+- id
+## Request
+
+### Body
+
+- path=string? Path of the Prompt including the Prompt name, which is used as a unique identifier.
+- name=string? Name of the Prompt.
+## Response
+
+### Body
+
+type_:PromptResponse: path=string Path of the Prompt, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Prompt.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+model=string The model instance used, e.g. \`gpt-4\`. See [supported models](https://humanloop.com/docs/supported-models)
+endpoint=type_:ModelEndpoints: unknown Supported model provider endpoints.? The provider model endpoint used.
+template=type_:PromptResponseTemplate: unknown For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.? For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.
+provider=type_:ModelProviders: unknown Supported model providers.? The company providing the underlying model service.
+max_tokens=integer? The maximum number of tokens to generate. Provide max_tokens=-1 to dynamically calculate the maximum number of tokens to generate given the length of the prompt
+temperature=double? What sampling temperature to use when making a generation. Higher values means the model will be more creative.
+top_p=double? An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass.
+stop=type_:PromptResponseStop: unknown The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.? The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.
+presence_penalty=double? Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the generation so far.
+frequency_penalty=double? Number between -2.0 and 2.0. Positive values penalize new tokens based on how frequently they appear in the generation so far.
+other=Map? Other parameter values to be passed to the provider call.
+seed=integer? If specified, model will make a best effort to sample deterministically, but it is not guaranteed.
+response_format=type_:ResponseFormat: unknown Response format of the model.? The format of the response. Only \`{"type": "json_object"}\` is currently supported for chat.
+tools=List? The tool specification that the model can choose to call if Tool calling is supported.
+linked_tools=List? The tools linked to your prompt that the model can call.
+attributes=Map? Additional fields to describe the Prompt. Helpful to separate Prompt versions from each other with details on how they were created or used.
+commit_message=string? Message describing the changes made.
+name=string Name of the Prompt.
+version_id=string Unique identifier for the specific Prompt Version. If no query params provided, the default deployed Prompt Version is returned.
+type=prompt?
+environments=List? The list of environments the Prompt Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Prompt.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Prompt Version.
+last_used_at=datetime
+version_logs_count=integer The number of logs that have been generated for this Prompt Version
+total_logs_count=integer The number of logs that have been generated across all Prompt Versions
+inputs=List Inputs associated to the Prompt. Inputs correspond to any of the variables used within the Prompt template.
+evaluators=List? Evaluators that have been attached to this Prompt that are used for monitoring logs.
+evaluator_aggregates=List? Aggregation of Evaluator results for the Prompt Version. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -77192,7 +93832,7 @@ An enumeration.",
"method": "PATCH",
"slug": "docs/v5/api-reference/prompts/move",
"title": "Move",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -82064,16 +98704,27 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/prompts/log",
- "title": "Prompts",
- },
+ "Humanloop API",
+ "Prompts",
],
- "description": "Get a list of all the versions of a Prompt.",
+ "content": "Get a list of all the versions of a Prompt.
+## Path Parameters
+
+- /prompts/
+- id
+- /versions
+## Query Parameters
+
+- status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration.? Filter versions by status: 'uncommitted', 'committed'. If no status is provided, all versions are returned.
+- evaluator_aggregates=boolean? Whether to include Evaluator aggregate results for the versions in the response
+## Response
+
+### Body
+
+type_:ListPrompts: records=List The list of Prompts. : ",
"endpointPath": [
{
"type": "literal",
@@ -82093,7 +98744,7 @@ An enumeration.",
"method": "GET",
"slug": "docs/v5/api-reference/prompts/list-versions",
"title": "List Versions",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -85497,18 +102148,65 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/prompts/log",
- "title": "Prompts",
- },
+ "Humanloop API",
+ "Prompts",
],
- "description": "Commit a version of the Prompt with a commit message.
+ "content": "Commit a version of the Prompt with a commit message.
+
+If the version is already committed, an exception will be raised.
+## Path Parameters
+
+- /prompts/
+- id
+- /versions/
+- version_id
+- /commit
+## Request
+
+### Body
+
+type_:CommitRequest: commit_message=string Message describing the changes made. :
+## Response
-If the version is already committed, an exception will be raised.",
+### Body
+
+type_:PromptResponse: path=string Path of the Prompt, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Prompt.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+model=string The model instance used, e.g. \`gpt-4\`. See [supported models](https://humanloop.com/docs/supported-models)
+endpoint=type_:ModelEndpoints: unknown Supported model provider endpoints.? The provider model endpoint used.
+template=type_:PromptResponseTemplate: unknown For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.? For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.
+provider=type_:ModelProviders: unknown Supported model providers.? The company providing the underlying model service.
+max_tokens=integer? The maximum number of tokens to generate. Provide max_tokens=-1 to dynamically calculate the maximum number of tokens to generate given the length of the prompt
+temperature=double? What sampling temperature to use when making a generation. Higher values means the model will be more creative.
+top_p=double? An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass.
+stop=type_:PromptResponseStop: unknown The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.? The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.
+presence_penalty=double? Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the generation so far.
+frequency_penalty=double? Number between -2.0 and 2.0. Positive values penalize new tokens based on how frequently they appear in the generation so far.
+other=Map? Other parameter values to be passed to the provider call.
+seed=integer? If specified, model will make a best effort to sample deterministically, but it is not guaranteed.
+response_format=type_:ResponseFormat: unknown Response format of the model.? The format of the response. Only \`{"type": "json_object"}\` is currently supported for chat.
+tools=List? The tool specification that the model can choose to call if Tool calling is supported.
+linked_tools=List? The tools linked to your prompt that the model can call.
+attributes=Map? Additional fields to describe the Prompt. Helpful to separate Prompt versions from each other with details on how they were created or used.
+commit_message=string? Message describing the changes made.
+name=string Name of the Prompt.
+version_id=string Unique identifier for the specific Prompt Version. If no query params provided, the default deployed Prompt Version is returned.
+type=prompt?
+environments=List? The list of environments the Prompt Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Prompt.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Prompt Version.
+last_used_at=datetime
+version_logs_count=integer The number of logs that have been generated for this Prompt Version
+total_logs_count=integer The number of logs that have been generated across all Prompt Versions
+inputs=List Inputs associated to the Prompt. Inputs correspond to any of the variables used within the Prompt template.
+evaluators=List? Evaluators that have been attached to this Prompt that are used for monitoring logs.
+evaluator_aggregates=List? Aggregation of Evaluator results for the Prompt Version. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -85536,7 +102234,7 @@ If the version is already committed, an exception will be raised.",
"method": "POST",
"slug": "docs/v5/api-reference/prompts/commit",
"title": "Commit",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -91755,19 +108453,65 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/prompts/log",
- "title": "Prompts",
- },
+ "Humanloop API",
+ "Prompts",
],
- "description": "Activate and deactivate Evaluators for monitoring the Prompt.
+ "content": "Activate and deactivate Evaluators for monitoring the Prompt.
An activated Evaluator will automatically be run on all new Logs
-within the Prompt for monitoring purposes.",
+within the Prompt for monitoring purposes.
+## Path Parameters
+
+- /prompts/
+- id
+- /evaluators
+## Request
+
+### Body
+
+type_:EvaluatorActivationDeactivationRequest: activate=List? Evaluators to activate for Monitoring. These will be automatically run on new Logs.
+deactivate=List? Evaluators to deactivate. These will not be run on new Logs. :
+## Response
+
+### Body
+
+type_:PromptResponse: path=string Path of the Prompt, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Prompt.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+model=string The model instance used, e.g. \`gpt-4\`. See [supported models](https://humanloop.com/docs/supported-models)
+endpoint=type_:ModelEndpoints: unknown Supported model provider endpoints.? The provider model endpoint used.
+template=type_:PromptResponseTemplate: unknown For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.? For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.
+provider=type_:ModelProviders: unknown Supported model providers.? The company providing the underlying model service.
+max_tokens=integer? The maximum number of tokens to generate. Provide max_tokens=-1 to dynamically calculate the maximum number of tokens to generate given the length of the prompt
+temperature=double? What sampling temperature to use when making a generation. Higher values means the model will be more creative.
+top_p=double? An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass.
+stop=type_:PromptResponseStop: unknown The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.? The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.
+presence_penalty=double? Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the generation so far.
+frequency_penalty=double? Number between -2.0 and 2.0. Positive values penalize new tokens based on how frequently they appear in the generation so far.
+other=Map? Other parameter values to be passed to the provider call.
+seed=integer? If specified, model will make a best effort to sample deterministically, but it is not guaranteed.
+response_format=type_:ResponseFormat: unknown Response format of the model.? The format of the response. Only \`{"type": "json_object"}\` is currently supported for chat.
+tools=List? The tool specification that the model can choose to call if Tool calling is supported.
+linked_tools=List? The tools linked to your prompt that the model can call.
+attributes=Map? Additional fields to describe the Prompt. Helpful to separate Prompt versions from each other with details on how they were created or used.
+commit_message=string? Message describing the changes made.
+name=string Name of the Prompt.
+version_id=string Unique identifier for the specific Prompt Version. If no query params provided, the default deployed Prompt Version is returned.
+type=prompt?
+environments=List? The list of environments the Prompt Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Prompt.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Prompt Version.
+last_used_at=datetime
+version_logs_count=integer The number of logs that have been generated for this Prompt Version
+total_logs_count=integer The number of logs that have been generated across all Prompt Versions
+inputs=List Inputs associated to the Prompt. Inputs correspond to any of the variables used within the Prompt template.
+evaluators=List? Evaluators that have been attached to this Prompt that are used for monitoring logs.
+evaluator_aggregates=List? Aggregation of Evaluator results for the Prompt Version. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -91787,7 +108531,7 @@ within the Prompt for monitoring purposes.",
"method": "POST",
"slug": "docs/v5/api-reference/prompts/update-monitoring",
"title": "Update Monitoring",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -97385,19 +114129,63 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/prompts/log",
- "title": "Prompts",
- },
+ "Humanloop API",
+ "Prompts",
],
- "description": "Deploy Prompt to an Environment.
+ "content": "Deploy Prompt to an Environment.
Set the deployed version for the specified Environment. This Prompt
-will be used for calls made to the Prompt in this Environment.",
+will be used for calls made to the Prompt in this Environment.
+## Path Parameters
+
+- /prompts/
+- id
+- /environments/
+- environment_id
+## Query Parameters
+
+- version_id=string Unique identifier for the specific version of the Prompt.
+## Response
+
+### Body
+
+type_:PromptResponse: path=string Path of the Prompt, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Prompt.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+model=string The model instance used, e.g. \`gpt-4\`. See [supported models](https://humanloop.com/docs/supported-models)
+endpoint=type_:ModelEndpoints: unknown Supported model provider endpoints.? The provider model endpoint used.
+template=type_:PromptResponseTemplate: unknown For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.? For chat endpoint, provide a Chat template. For completion endpoint, provide a Prompt template. Input variables within the template should be specified with double curly bracket syntax: {{INPUT_NAME}}.
+provider=type_:ModelProviders: unknown Supported model providers.? The company providing the underlying model service.
+max_tokens=integer? The maximum number of tokens to generate. Provide max_tokens=-1 to dynamically calculate the maximum number of tokens to generate given the length of the prompt
+temperature=double? What sampling temperature to use when making a generation. Higher values means the model will be more creative.
+top_p=double? An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass.
+stop=type_:PromptResponseStop: unknown The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.? The string (or list of strings) after which the model will stop generating. The returned text will not contain the stop sequence.
+presence_penalty=double? Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the generation so far.
+frequency_penalty=double? Number between -2.0 and 2.0. Positive values penalize new tokens based on how frequently they appear in the generation so far.
+other=Map? Other parameter values to be passed to the provider call.
+seed=integer? If specified, model will make a best effort to sample deterministically, but it is not guaranteed.
+response_format=type_:ResponseFormat: unknown Response format of the model.? The format of the response. Only \`{"type": "json_object"}\` is currently supported for chat.
+tools=List? The tool specification that the model can choose to call if Tool calling is supported.
+linked_tools=List? The tools linked to your prompt that the model can call.
+attributes=Map? Additional fields to describe the Prompt. Helpful to separate Prompt versions from each other with details on how they were created or used.
+commit_message=string? Message describing the changes made.
+name=string Name of the Prompt.
+version_id=string Unique identifier for the specific Prompt Version. If no query params provided, the default deployed Prompt Version is returned.
+type=prompt?
+environments=List? The list of environments the Prompt Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Prompt.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Prompt Version.
+last_used_at=datetime
+version_logs_count=integer The number of logs that have been generated for this Prompt Version
+total_logs_count=integer The number of logs that have been generated across all Prompt Versions
+inputs=List Inputs associated to the Prompt. Inputs correspond to any of the variables used within the Prompt template.
+evaluators=List? Evaluators that have been attached to this Prompt that are used for monitoring logs.
+evaluator_aggregates=List? Aggregation of Evaluator results for the Prompt Version. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -97421,7 +114209,7 @@ will be used for calls made to the Prompt in this Environment.",
"method": "POST",
"slug": "docs/v5/api-reference/prompts/set-deployment",
"title": "Set Deployment",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -103220,19 +120008,19 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/prompts/log",
- "title": "Prompts",
- },
+ "Humanloop API",
+ "Prompts",
],
- "description": "Remove deployed Prompt from the Environment.
+ "content": "Remove deployed Prompt from the Environment.
Remove the deployed version for the specified Environment. This Prompt
-will no longer be used for calls made to the Prompt in this Environment.",
+will no longer be used for calls made to the Prompt in this Environment.
+## Path Parameters
+
+- /prompts/
+- id
+- /environments/
+- environment_id",
"endpointPath": [
{
"type": "literal",
@@ -103256,7 +120044,7 @@ will no longer be used for calls made to the Prompt in this Environment.",
"method": "DELETE",
"slug": "docs/v5/api-reference/prompts/remove-deployment",
"title": "Remove Deployment",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -103712,16 +120500,26 @@ will no longer be used for calls made to the Prompt in this Environment.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/prompts/log",
- "title": "Prompts",
- },
+ "Humanloop API",
+ "Prompts",
],
- "description": "List all Environments and their deployed versions for the Prompt.",
+ "content": "List all Environments and their deployed versions for the Prompt.
+## Path Parameters
+
+- /prompts/
+- id
+- /environments
+## Response
+
+### Body
+
+List: ",
"endpointPath": [
{
"type": "literal",
@@ -103741,7 +120539,7 @@ will no longer be used for calls made to the Prompt in this Environment.",
"method": "GET",
"slug": "docs/v5/api-reference/prompts/list-environments",
"title": "List Environments",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -104754,16 +121552,10 @@ in the inheriting classes with documentation and appropriate Field definitions."
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/tools/log",
- "title": "Tools",
- },
+ "Humanloop API",
+ "Tools",
],
- "description": "Log to a Tool.
+ "content": "Log to a Tool.
You can use query parameters \`version_id\`, or \`environment\`, to target
an existing version of the Tool. Otherwise the default deployed version will be chosen.
@@ -104771,7 +121563,53 @@ an existing version of the Tool. Otherwise the default deployed version will be
Instead of targeting an existing version explicitly, you can instead pass in
Tool details in the request body. In this case, we will check if the details correspond
to an existing version of the Tool, if not we will create a new version. This is helpful
-in the case where you are storing or deriving your Tool details in code.",
+in the case where you are storing or deriving your Tool details in code.
+## Path Parameters
+
+- /tools/log
+## Query Parameters
+
+- version_id=string? A specific Version ID of the Tool to log to.
+- environment=string? Name of the Environment identifying a deployed version to log to.
+## Request
+
+### Body
+
+- path=string? Path of the Tool, including the name. This locates the Tool in the Humanloop filesystem and is used as as a unique identifier.
+Example: folder/name or just name.
+- id=string? ID for an existing Tool.
+- start_time=datetime? When the logged event started.
+- end_time=datetime? When the logged event ended.
+- output=string? Generated output from your model for the provided inputs. Can be None if logging an error, or if creating a parent Log with the
+intention to populate it later.
+- created_at=datetime? User defined timestamp for when the log was created.
+- error=string? Error message if the log is an error.
+- provider_latency=double? Duration of the logged event in seconds.
+- stdout=string? Captured log and debug statements.
+- provider_request=Map? Raw request sent to provider.
+- provider_response=Map? Raw response received the provider.
+- inputs=Map? The inputs passed to the prompt template.
+- source=string? Identifies where the model was called from.
+- metadata=Map? Any additional metadata to record.
+- source_datapoint_id=string? Unique identifier for the Datapoint that this Log is derived from. This can be used by Humanloop to associate Logs to Evaluations.
+If provided, Humanloop will automatically associate this Log to Evaluations that require a Log for this Datapoint-Version pair.
+- trace_parent_id=string? The ID of the parent Log to nest this Log under in a Trace.
+- batches=List? Array of Batch Ids that this log is part of. Batches are used to group Logs together for offline Evaluations
+- user=string? End-user ID related to the Log.
+- environment=string? The name of the Environment the Log is associated to.
+- save=boolean? Whether the request/response payloads will be stored on Humanloop.
+- tool=type_:ToolKernelRequest: function=unknown Callable function specification of the Tool shown to the model for tool calling.
+source_code=unknown Code source of the Tool.
+setup_values=unknown Values needed to setup the Tool, defined in JSON Schema format: https://json-schema.org/
+attributes=unknown Additional fields to describe the Tool. Helpful to separate Tool versions from each other with details on how they were created or used. ? Details of your Tool. A new Tool version will be created if the provided details are new.
+## Response
+
+### Body
+
+type_:CreateToolLogResponse: id=string String ID of log.
+tool_id=string ID of the Tool the log belongs to.
+version_id=string ID of the specific version of the Tool.
+session_id=string? String ID of session the log belongs to. : ",
"endpointPath": [
{
"type": "literal",
@@ -104783,7 +121621,7 @@ in the case where you are storing or deriving your Tool details in code.",
"method": "POST",
"slug": "docs/v5/api-reference/tools/log",
"title": "Log",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -106829,18 +123667,149 @@ in the case where you are storing or deriving your Tool details in code.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/tools/log",
- "title": "Tools",
- },
+ "Humanloop API",
+ "Tools",
],
- "description": "Update a Log.
+ "content": "Update a Log.
+
+Update the details of a Log with the given ID.
+## Path Parameters
+
+- /tools/
+- id
+- /log/
+- log_id
+## Request
+
+### Body
+
+- output=string? Generated output from your model for the provided inputs. Can be None if logging an error, or if creating a parent Log with the
+intention to populate it later.
+- created_at=datetime? User defined timestamp for when the log was created.
+- error=string? Error message if the log is an error.
+- provider_latency=double? Duration of the logged event in seconds.
+- stdout=string? Captured log and debug statements.
+- provider_request=Map? Raw request sent to provider.
+- provider_response=Map? Raw response received the provider.
+- inputs=Map? The inputs passed to the prompt template.
+- source=string? Identifies where the model was called from.
+- metadata=Map? Any additional metadata to record.
+- start_time=datetime? When the logged event started.
+- end_time=datetime? When the logged event ended.
+## Response
-Update the details of a Log with the given ID.",
+### Body
+
+type_:LogResponse: type_:PromptLogResponse: output_message=unknown The message returned by the provider.
+prompt_tokens=unknown Number of tokens in the prompt used to generate the output.
+output_tokens=unknown Number of tokens in the output generated by the model.
+prompt_cost=unknown Cost in dollars associated to the tokens in the prompt.
+output_cost=unknown Cost in dollars associated to the tokens in the output.
+finish_reason=unknown Reason the generation finished.
+messages=unknown The messages passed to the to provider chat endpoint.
+tool_choice=unknown Controls how the model uses tools. The following options are supported:
+
+- \`'none'\` means the model will not call any tool and instead generates a message; this is the default when no tools are provided as part of the Prompt.
+- \`'auto'\` means the model can decide to call one or more of the provided tools; this is the default when tools are provided as part of the Prompt.
+- \`'required'\` means the model can decide to call one or more of the provided tools.
+- \`{'type': 'function', 'function': {name': }}\` forces the model to use the named function.
+prompt=unknown Prompt used to generate the Log.
+start_time=unknown When the logged event started.
+end_time=unknown When the logged event ended.
+output=unknown Generated output from your model for the provided inputs. Can be \`None\` if logging an error, or if creating a parent Log with the intention to populate it later.
+created_at=unknown User defined timestamp for when the log was created.
+error=unknown Error message if the log is an error.
+provider_latency=unknown Duration of the logged event in seconds.
+stdout=unknown Captured log and debug statements.
+provider_request=unknown Raw request sent to provider.
+provider_response=unknown Raw response received the provider.
+inputs=unknown The inputs passed to the prompt template.
+source=unknown Identifies where the model was called from.
+metadata=unknown Any additional metadata to record.
+source_datapoint_id=unknown Unique identifier for the Datapoint that this Log is derived from. This can be used by Humanloop to associate Logs to Evaluations. If provided, Humanloop will automatically associate this Log to Evaluations that require a Log for this Datapoint-Version pair.
+trace_parent_id=unknown The ID of the parent Log to nest this Log under in a Trace.
+batches=unknown Array of Batch Ids that this log is part of. Batches are used to group Logs together for offline Evaluations
+user=unknown End-user ID related to the Log.
+environment=unknown The name of the Environment the Log is associated to.
+save=unknown Whether the request/response payloads will be stored on Humanloop.
+id=unknown Unique identifier for the Log.
+evaluator_logs=unknown List of Evaluator Logs associated with the Log. These contain Evaluator judgments on the Log.
+trace_flow_id=unknown Identifier for the Flow that the Trace belongs to.
+trace_id=unknown Identifier for the Trace that the Log belongs to.
+trace_children=unknown Logs nested under this Log in the Trace. General request for creating a Log Prompt Log Response | type_:ToolLogResponse: start_time=unknown When the logged event started.
+end_time=unknown When the logged event ended.
+output=unknown Generated output from your model for the provided inputs. Can be \`None\` if logging an error, or if creating a parent Log with the intention to populate it later.
+created_at=unknown User defined timestamp for when the log was created.
+error=unknown Error message if the log is an error.
+provider_latency=unknown Duration of the logged event in seconds.
+stdout=unknown Captured log and debug statements.
+provider_request=unknown Raw request sent to provider.
+provider_response=unknown Raw response received the provider.
+inputs=unknown The inputs passed to the prompt template.
+source=unknown Identifies where the model was called from.
+metadata=unknown Any additional metadata to record.
+source_datapoint_id=unknown Unique identifier for the Datapoint that this Log is derived from. This can be used by Humanloop to associate Logs to Evaluations. If provided, Humanloop will automatically associate this Log to Evaluations that require a Log for this Datapoint-Version pair.
+trace_parent_id=unknown The ID of the parent Log to nest this Log under in a Trace.
+batches=unknown Array of Batch Ids that this log is part of. Batches are used to group Logs together for offline Evaluations
+user=unknown End-user ID related to the Log.
+environment=unknown The name of the Environment the Log is associated to.
+save=unknown Whether the request/response payloads will be stored on Humanloop.
+id=unknown Unique identifier for the Log.
+evaluator_logs=unknown List of Evaluator Logs associated with the Log. These contain Evaluator judgments on the Log.
+trace_flow_id=unknown Identifier for the Flow that the Trace belongs to.
+trace_id=unknown Identifier for the Trace that the Log belongs to.
+trace_children=unknown Logs nested under this Log in the Trace.
+tool=unknown Tool used to generate the Log. General request for creating a Log Tool Log Response | type_:EvaluatorLogResponse: start_time=unknown When the logged event started.
+end_time=unknown When the logged event ended.
+output=unknown Generated output from your model for the provided inputs. Can be \`None\` if logging an error, or if creating a parent Log with the intention to populate it later.
+created_at=unknown User defined timestamp for when the log was created.
+error=unknown Error message if the log is an error.
+provider_latency=unknown Duration of the logged event in seconds.
+stdout=unknown Captured log and debug statements.
+provider_request=unknown Raw request sent to provider.
+provider_response=unknown Raw response received the provider.
+inputs=unknown The inputs passed to the prompt template.
+source=unknown Identifies where the model was called from.
+metadata=unknown Any additional metadata to record.
+parent_id=unknown Identifier of the evaluated Log. The newly created Log will have this one set as parent.
+source_datapoint_id=unknown Unique identifier for the Datapoint that this Log is derived from. This can be used by Humanloop to associate Logs to Evaluations. If provided, Humanloop will automatically associate this Log to Evaluations that require a Log for this Datapoint-Version pair.
+trace_parent_id=unknown The ID of the parent Log to nest this Log under in a Trace.
+batches=unknown Array of Batch Ids that this log is part of. Batches are used to group Logs together for offline Evaluations
+user=unknown End-user ID related to the Log.
+environment=unknown The name of the Environment the Log is associated to.
+save=unknown Whether the request/response payloads will be stored on Humanloop.
+judgment=unknown Evaluator assessment of the Log.
+id=unknown Unique identifier for the Log.
+evaluator_logs=unknown List of Evaluator Logs associated with the Log. These contain Evaluator judgments on the Log.
+trace_flow_id=unknown Identifier for the Flow that the Trace belongs to.
+trace_id=unknown Identifier for the Trace that the Log belongs to.
+trace_children=unknown Logs nested under this Log in the Trace.
+evaluator=unknown Evaluator used to generate the judgment.
+parent=unknown The Log that was evaluated. Only provided if the ?include_parent query parameter is set for the General request for creating a Log Evaluator Log Response | type_:FlowLogResponse: start_time=unknown When the logged event started.
+end_time=unknown When the logged event ended.
+output=unknown Generated output from your model for the provided inputs. Can be \`None\` if logging an error, or if creating a parent Log with the intention to populate it later.
+created_at=unknown User defined timestamp for when the log was created.
+error=unknown Error message if the log is an error.
+provider_latency=unknown Duration of the logged event in seconds.
+stdout=unknown Captured log and debug statements.
+provider_request=unknown Raw request sent to provider.
+provider_response=unknown Raw response received the provider.
+inputs=unknown The inputs passed to the Flow Log.
+source=unknown Identifies where the model was called from.
+metadata=unknown Any additional metadata to record.
+source_datapoint_id=unknown Unique identifier for the Datapoint that this Log is derived from. This can be used by Humanloop to associate Logs to Evaluations. If provided, Humanloop will automatically associate this Log to Evaluations that require a Log for this Datapoint-Version pair.
+trace_parent_id=unknown The ID of the parent Log to nest this Log under in a Trace.
+batches=unknown Array of Batch Ids that this log is part of. Batches are used to group Logs together for offline Evaluations
+user=unknown End-user ID related to the Log.
+environment=unknown The name of the Environment the Log is associated to.
+save=unknown Whether the request/response payloads will be stored on Humanloop.
+id=unknown Unique identifier for the Log.
+evaluator_logs=unknown List of Evaluator Logs associated with the Log. These contain Evaluator judgments on the Log.
+trace_flow_id=unknown Identifier for the Flow that the Trace belongs to.
+trace_id=unknown Identifier for the Trace that the Log belongs to.
+trace_children=unknown Logs nested under this Log in the Trace.
+flow=unknown Flow used to generate the Log.
+trace_status=unknown Status of the Trace. When a Trace is marked as \`complete\`, no more Logs can be added to it. Monitoring Evaluators will only run on completed Traces. General request for creating a Log Flow Log Response : ",
"endpointPath": [
{
"type": "literal",
@@ -106864,7 +123833,7 @@ Update the details of a Log with the given ID.",
"method": "PATCH",
"slug": "docs/v5/api-reference/tools/update",
"title": "Update Log",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -124301,16 +141270,32 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/tools/log",
- "title": "Tools",
- },
+ "Humanloop API",
+ "Tools",
],
- "description": "Get a list of all Tools.",
+ "content": "Get a list of all Tools.
+## Path Parameters
+
+- /tools
+## Query Parameters
+
+- page=integer? Page offset for pagination.
+- size=integer? Page size for pagination. Number of Tools to fetch.
+- name=string? Case-insensitive filter for Tool name.
+- user_filter=string? Case-insensitive filter for users in the Tool. This filter matches against both email address and name of users.
+- sort_by=type_:ProjectSortBy: created_at (),updated_at (),name () An enumeration.? Field to sort Tools by
+- order=type_:SortOrder: asc (),desc () An enumeration.? Direction to sort by.
+## Response
+
+### Body
+
+type_:PaginatedDataToolResponse: records=List
+page=integer
+size=integer
+total=integer : ",
"endpointPath": [
{
"type": "literal",
@@ -124322,7 +141307,7 @@ An enumeration.",
"method": "GET",
"slug": "docs/v5/api-reference/tools/list",
"title": "List ",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -126953,22 +143938,67 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/tools/log",
- "title": "Tools",
- },
+ "Humanloop API",
+ "Tools",
],
- "description": "Create a Tool or update it with a new version if it already exists.
+ "content": "Create a Tool or update it with a new version if it already exists.
Tools are identified by the \`ID\` or their \`path\`. The name, description and parameters determine the versions of the Tool.
If you provide a commit message, then the new version will be committed;
otherwise it will be uncommitted. If you try to commit an already committed version,
-an exception will be raised.",
+an exception will be raised.
+## Path Parameters
+
+- /tools
+## Request
+
+### Body
+
+- path=string? Path of the Tool, including the name. This locates the Tool in the Humanloop filesystem and is used as as a unique identifier.
+Example: folder/name or just name.
+- id=string? ID for an existing Tool.
+- function=type_:ToolFunction: name=unknown Name for the tool referenced by the model.
+description=unknown Description of the tool referenced by the model
+strict=unknown If true, forces the model to output json data in the structure of the parameters schema.
+parameters=unknown Parameters needed to run the Tool, defined in JSON Schema format: https://json-schema.org/ ? Callable function specification of the Tool shown to the model for tool calling.
+- source_code=string? Code source of the Tool.
+- setup_values=Map? Values needed to setup the Tool, defined in JSON Schema format: https://json-schema.org/ [https://json-schema.org/]
+- attributes=Map? Additional fields to describe the Tool. Helpful to separate Tool versions from each other with details on how they were created or
+used.
+- tool_type=type_:FilesToolType: pinecone_search (),google (),mock (),snippet (),json_schema (),get_api_call () Type of tool.? Type of Tool.
+- commit_message=string? Message describing the changes made.
+## Response
+
+### Body
+
+type_:ToolResponse: path=string Path of the Tool, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Tool.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+function=type_:ToolFunction: unknown ? Callable function specification of the Tool shown to the model for tool calling.
+source_code=string? Code source of the Tool.
+setup_values=Map? Values needed to setup the Tool, defined in JSON Schema format: https://json-schema.org/
+attributes=Map? Additional fields to describe the Tool. Helpful to separate Tool versions from each other with details on how they were created or used.
+tool_type=type_:FilesToolType: unknown Type of tool.? Type of Tool.
+commit_message=string? Message describing the changes made.
+name=string Name of the Tool, which is used as a unique identifier.
+version_id=string Unique identifier for the specific Tool Version. If no query params provided, the default deployed Tool Version is returned.
+type=tool?
+environments=List? The list of environments the Tool Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Tool.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Tool Version.
+last_used_at=datetime
+version_logs_count=integer The number of logs that have been generated for this Tool Version
+total_logs_count=integer The number of logs that have been generated across all Tool Versions
+inputs=List Inputs associated to the Prompt. Inputs correspond to any of the variables used within the Tool template.
+evaluators=List? Evaluators that have been attached to this Tool that are used for monitoring logs.
+signature=string? Signature of the Tool.
+evaluator_aggregates=List? Aggregation of Evaluator results for the Tool Version. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -126980,7 +144010,7 @@ an exception will be raised.",
"method": "POST",
"slug": "docs/v5/api-reference/tools/upsert",
"title": "Upsert",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -130719,19 +147749,52 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/tools/log",
- "title": "Tools",
- },
+ "Humanloop API",
+ "Tools",
],
- "description": "Retrieve the Tool with the given ID.
+ "content": "Retrieve the Tool with the given ID.
By default, the deployed version of the Tool is returned. Use the query parameters
-\`version_id\` or \`environment\` to target a specific version of the Tool.",
+\`version_id\` or \`environment\` to target a specific version of the Tool.
+## Path Parameters
+
+- /tools/
+- id
+## Query Parameters
+
+- version_id=string? A specific Version ID of the Tool to retrieve.
+- environment=string? Name of the Environment to retrieve a deployed Version from.
+## Response
+
+### Body
+
+type_:ToolResponse: path=string Path of the Tool, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Tool.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+function=type_:ToolFunction: unknown ? Callable function specification of the Tool shown to the model for tool calling.
+source_code=string? Code source of the Tool.
+setup_values=Map? Values needed to setup the Tool, defined in JSON Schema format: https://json-schema.org/
+attributes=Map? Additional fields to describe the Tool. Helpful to separate Tool versions from each other with details on how they were created or used.
+tool_type=type_:FilesToolType: unknown Type of tool.? Type of Tool.
+commit_message=string? Message describing the changes made.
+name=string Name of the Tool, which is used as a unique identifier.
+version_id=string Unique identifier for the specific Tool Version. If no query params provided, the default deployed Tool Version is returned.
+type=tool?
+environments=List? The list of environments the Tool Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Tool.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Tool Version.
+last_used_at=datetime
+version_logs_count=integer The number of logs that have been generated for this Tool Version
+total_logs_count=integer The number of logs that have been generated across all Tool Versions
+inputs=List Inputs associated to the Prompt. Inputs correspond to any of the variables used within the Tool template.
+evaluators=List? Evaluators that have been attached to this Tool that are used for monitoring logs.
+signature=string? Signature of the Tool.
+evaluator_aggregates=List? Aggregation of Evaluator results for the Tool Version. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -130747,7 +147810,7 @@ By default, the deployed version of the Tool is returned. Use the query paramete
"method": "GET",
"slug": "docs/v5/api-reference/tools/get",
"title": "Get",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -134140,16 +151203,14 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/tools/log",
- "title": "Tools",
- },
+ "Humanloop API",
+ "Tools",
],
- "description": "Delete the Tool with the given ID.",
+ "content": "Delete the Tool with the given ID.
+## Path Parameters
+
+- /tools/
+- id",
"endpointPath": [
{
"type": "literal",
@@ -134165,7 +151226,7 @@ An enumeration.",
"method": "DELETE",
"slug": "docs/v5/api-reference/tools/delete",
"title": "Delete",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -134463,16 +151524,51 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/tools/log",
- "title": "Tools",
- },
+ "Humanloop API",
+ "Tools",
],
- "description": "Move the Tool to a different path or change the name.",
+ "content": "Move the Tool to a different path or change the name.
+## Path Parameters
+
+- /tools/
+- id
+## Request
+
+### Body
+
+- path=string? Path of the Tool including the Tool name, which is used as a unique identifier.
+- name=string? Name of the Tool, which is used as a unique identifier.
+## Response
+
+### Body
+
+type_:ToolResponse: path=string Path of the Tool, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Tool.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+function=type_:ToolFunction: unknown ? Callable function specification of the Tool shown to the model for tool calling.
+source_code=string? Code source of the Tool.
+setup_values=Map? Values needed to setup the Tool, defined in JSON Schema format: https://json-schema.org/
+attributes=Map? Additional fields to describe the Tool. Helpful to separate Tool versions from each other with details on how they were created or used.
+tool_type=type_:FilesToolType: unknown Type of tool.? Type of Tool.
+commit_message=string? Message describing the changes made.
+name=string Name of the Tool, which is used as a unique identifier.
+version_id=string Unique identifier for the specific Tool Version. If no query params provided, the default deployed Tool Version is returned.
+type=tool?
+environments=List? The list of environments the Tool Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Tool.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Tool Version.
+last_used_at=datetime
+version_logs_count=integer The number of logs that have been generated for this Tool Version
+total_logs_count=integer The number of logs that have been generated across all Tool Versions
+inputs=List Inputs associated to the Prompt. Inputs correspond to any of the variables used within the Tool template.
+evaluators=List? Evaluators that have been attached to this Tool that are used for monitoring logs.
+signature=string? Signature of the Tool.
+evaluator_aggregates=List? Aggregation of Evaluator results for the Tool Version. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -134488,7 +151584,7 @@ An enumeration.",
"method": "PATCH",
"slug": "docs/v5/api-reference/tools/move",
"title": "Move",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -137818,16 +154914,27 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/tools/log",
- "title": "Tools",
- },
+ "Humanloop API",
+ "Tools",
],
- "description": "Get a list of all the versions of a Tool.",
+ "content": "Get a list of all the versions of a Tool.
+## Path Parameters
+
+- /tools/
+- id
+- /versions
+## Query Parameters
+
+- status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration.? Filter versions by status: 'uncommitted', 'committed'. If no status is provided, all versions are returned.
+- evaluator_aggregates=boolean? Whether to include Evaluator aggregate results for the versions in the response
+## Response
+
+### Body
+
+type_:ListTools: records=List The list of Tools. : ",
"endpointPath": [
{
"type": "literal",
@@ -137847,7 +154954,7 @@ An enumeration.",
"method": "GET",
"slug": "docs/v5/api-reference/tools/list-versions",
"title": "List Versions",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -140403,18 +157510,55 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/tools/log",
- "title": "Tools",
- },
+ "Humanloop API",
+ "Tools",
],
- "description": "Commit a version of the Tool with a commit message.
+ "content": "Commit a version of the Tool with a commit message.
+
+If the version is already committed, an exception will be raised.
+## Path Parameters
-If the version is already committed, an exception will be raised.",
+- /tools/
+- id
+- /versions/
+- version_id
+- /commit
+## Request
+
+### Body
+
+type_:CommitRequest: commit_message=string Message describing the changes made. :
+## Response
+
+### Body
+
+type_:ToolResponse: path=string Path of the Tool, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Tool.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+function=type_:ToolFunction: unknown ? Callable function specification of the Tool shown to the model for tool calling.
+source_code=string? Code source of the Tool.
+setup_values=Map? Values needed to setup the Tool, defined in JSON Schema format: https://json-schema.org/
+attributes=Map? Additional fields to describe the Tool. Helpful to separate Tool versions from each other with details on how they were created or used.
+tool_type=type_:FilesToolType: unknown Type of tool.? Type of Tool.
+commit_message=string? Message describing the changes made.
+name=string Name of the Tool, which is used as a unique identifier.
+version_id=string Unique identifier for the specific Tool Version. If no query params provided, the default deployed Tool Version is returned.
+type=tool?
+environments=List? The list of environments the Tool Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Tool.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Tool Version.
+last_used_at=datetime
+version_logs_count=integer The number of logs that have been generated for this Tool Version
+total_logs_count=integer The number of logs that have been generated across all Tool Versions
+inputs=List Inputs associated to the Prompt. Inputs correspond to any of the variables used within the Tool template.
+evaluators=List? Evaluators that have been attached to this Tool that are used for monitoring logs.
+signature=string? Signature of the Tool.
+evaluator_aggregates=List? Aggregation of Evaluator results for the Tool Version. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -140442,7 +157586,7 @@ If the version is already committed, an exception will be raised.",
"method": "POST",
"slug": "docs/v5/api-reference/tools/commit",
"title": "Commit",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -144711,19 +161855,55 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/tools/log",
- "title": "Tools",
- },
+ "Humanloop API",
+ "Tools",
],
- "description": "Activate and deactivate Evaluators for monitoring the Tool.
+ "content": "Activate and deactivate Evaluators for monitoring the Tool.
An activated Evaluator will automatically be run on all new Logs
-within the Tool for monitoring purposes.",
+within the Tool for monitoring purposes.
+## Path Parameters
+
+- /tools/
+- id
+- /evaluators
+## Request
+
+### Body
+
+type_:EvaluatorActivationDeactivationRequest: activate=List? Evaluators to activate for Monitoring. These will be automatically run on new Logs.
+deactivate=List? Evaluators to deactivate. These will not be run on new Logs. :
+## Response
+
+### Body
+
+type_:ToolResponse: path=string Path of the Tool, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Tool.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+function=type_:ToolFunction: unknown ? Callable function specification of the Tool shown to the model for tool calling.
+source_code=string? Code source of the Tool.
+setup_values=Map? Values needed to setup the Tool, defined in JSON Schema format: https://json-schema.org/
+attributes=Map? Additional fields to describe the Tool. Helpful to separate Tool versions from each other with details on how they were created or used.
+tool_type=type_:FilesToolType: unknown Type of tool.? Type of Tool.
+commit_message=string? Message describing the changes made.
+name=string Name of the Tool, which is used as a unique identifier.
+version_id=string Unique identifier for the specific Tool Version. If no query params provided, the default deployed Tool Version is returned.
+type=tool?
+environments=List? The list of environments the Tool Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Tool.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Tool Version.
+last_used_at=datetime
+version_logs_count=integer The number of logs that have been generated for this Tool Version
+total_logs_count=integer The number of logs that have been generated across all Tool Versions
+inputs=List Inputs associated to the Prompt. Inputs correspond to any of the variables used within the Tool template.
+evaluators=List? Evaluators that have been attached to this Tool that are used for monitoring logs.
+signature=string? Signature of the Tool.
+evaluator_aggregates=List? Aggregation of Evaluator results for the Tool Version. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -144743,7 +161923,7 @@ within the Tool for monitoring purposes.",
"method": "POST",
"slug": "docs/v5/api-reference/tools/update-monitoring",
"title": "Update Monitoring",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -148663,19 +165843,53 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/tools/log",
- "title": "Tools",
- },
+ "Humanloop API",
+ "Tools",
],
- "description": "Deploy Tool to an Environment.
+ "content": "Deploy Tool to an Environment.
Set the deployed version for the specified Environment. This Prompt
-will be used for calls made to the Tool in this Environment.",
+will be used for calls made to the Tool in this Environment.
+## Path Parameters
+
+- /tools/
+- id
+- /environments/
+- environment_id
+## Query Parameters
+
+- version_id=string Unique identifier for the specific version of the Tool.
+## Response
+
+### Body
+
+type_:ToolResponse: path=string Path of the Tool, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Tool.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+function=type_:ToolFunction: unknown ? Callable function specification of the Tool shown to the model for tool calling.
+source_code=string? Code source of the Tool.
+setup_values=Map? Values needed to setup the Tool, defined in JSON Schema format: https://json-schema.org/
+attributes=Map? Additional fields to describe the Tool. Helpful to separate Tool versions from each other with details on how they were created or used.
+tool_type=type_:FilesToolType: unknown Type of tool.? Type of Tool.
+commit_message=string? Message describing the changes made.
+name=string Name of the Tool, which is used as a unique identifier.
+version_id=string Unique identifier for the specific Tool Version. If no query params provided, the default deployed Tool Version is returned.
+type=tool?
+environments=List? The list of environments the Tool Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Tool.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Tool Version.
+last_used_at=datetime
+version_logs_count=integer The number of logs that have been generated for this Tool Version
+total_logs_count=integer The number of logs that have been generated across all Tool Versions
+inputs=List Inputs associated to the Prompt. Inputs correspond to any of the variables used within the Tool template.
+evaluators=List? Evaluators that have been attached to this Tool that are used for monitoring logs.
+signature=string? Signature of the Tool.
+evaluator_aggregates=List? Aggregation of Evaluator results for the Tool Version. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -148699,7 +165913,7 @@ will be used for calls made to the Tool in this Environment.",
"method": "POST",
"slug": "docs/v5/api-reference/tools/set-deployment",
"title": "Set Deployment",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -152684,19 +169898,19 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/tools/log",
- "title": "Tools",
- },
+ "Humanloop API",
+ "Tools",
],
- "description": "Remove deployed Tool from the Environment.
+ "content": "Remove deployed Tool from the Environment.
Remove the deployed version for the specified Environment. This Tool
-will no longer be used for calls made to the Tool in this Environment.",
+will no longer be used for calls made to the Tool in this Environment.
+## Path Parameters
+
+- /tools/
+- id
+- /environments/
+- environment_id",
"endpointPath": [
{
"type": "literal",
@@ -152720,7 +169934,7 @@ will no longer be used for calls made to the Tool in this Environment.",
"method": "DELETE",
"slug": "docs/v5/api-reference/tools/remove-deployment",
"title": "Remove Deployment",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -153176,16 +170390,26 @@ will no longer be used for calls made to the Tool in this Environment.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/tools/log",
- "title": "Tools",
- },
+ "Humanloop API",
+ "Tools",
],
- "description": "List all Environments and their deployed versions for the Tool.",
+ "content": "List all Environments and their deployed versions for the Tool.
+## Path Parameters
+
+- /tools/
+- id
+- /environments
+## Response
+
+### Body
+
+List: ",
"endpointPath": [
{
"type": "literal",
@@ -153205,7 +170429,7 @@ will no longer be used for calls made to the Tool in this Environment.",
"method": "GET",
"slug": "docs/v5/api-reference/tools/list-environments",
"title": "List Environments",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -154218,16 +171442,32 @@ in the inheriting classes with documentation and appropriate Field definitions."
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/datasets/list",
- "title": "Datasets",
- },
+ "Humanloop API",
+ "Datasets",
],
- "description": "List all Datasets.",
+ "content": "List all Datasets.
+## Path Parameters
+
+- /datasets
+## Query Parameters
+
+- page=integer? Page offset for pagination.
+- size=integer? Page size for pagination. Number of Datasets to fetch.
+- name=string? Case-insensitive filter for Dataset name.
+- user_filter=string? Case-insensitive filter for users in the Dataset. This filter matches against both email address and name of users.
+- sort_by=type_:ProjectSortBy: created_at (),updated_at (),name () An enumeration.? Field to sort Datasets by
+- order=type_:SortOrder: asc (),desc () An enumeration.? Direction to sort by.
+## Response
+
+### Body
+
+type_:PaginatedDatasetResponse: records=List
+page=integer
+size=integer
+total=integer : ",
"endpointPath": [
{
"type": "literal",
@@ -154239,7 +171479,7 @@ in the inheriting classes with documentation and appropriate Field definitions."
"method": "GET",
"slug": "docs/v5/api-reference/datasets/list",
"title": "List ",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -156242,16 +173482,10 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/datasets/list",
- "title": "Datasets",
- },
+ "Humanloop API",
+ "Datasets",
],
- "description": "Create a Dataset or update it with a new version if it already exists.
+ "content": "Create a Dataset or update it with a new version if it already exists.
Datasets are identified by the \`ID\` or their \`path\`. The datapoints determine the versions of the Dataset.
@@ -156267,7 +173501,60 @@ an exception will be raised.
Humanloop also deduplicates Datapoints. If you try to add a Datapoint that already
exists, it will be ignored. If you intentionally want to add a duplicate Datapoint,
-you can add a unique identifier to the Datapoint's inputs such as \`{_dedupe_id: }\`.",
+you can add a unique identifier to the Datapoint's inputs such as \`{_dedupe_id: }\`.
+## Path Parameters
+
+- /datasets
+## Query Parameters
+
+- version_id=string? ID of the specific Dataset version to base the created Version on. Only used when action is "add" or "remove".
+- environment=string? Name of the Environment identifying a deployed Version to base the created Version on. Only used when action is "add" or "remove".
+## Request
+
+### Body
+
+- path=string? Path of the Dataset, including the name. This locates the Dataset in the Humanloop filesystem and is used as as a unique
+identifier. Example: folder/name or just name.
+- id=string? ID for an existing Dataset.
+- datapoints=List The Datapoints to create this Dataset version with. Modify the action field to determine how these Datapoints are used.
+- action=type_:UpdateDatesetAction: set (),add (),remove () An enumeration.? The action to take with the provided Datapoints.
+
+ * If "set", the created version will only contain the Datapoints provided in this request.
+ * If "add", the created version will contain the Datapoints provided in this request in addition to the Datapoints in the target
+ version.
+ * If "remove", the created version will contain the Datapoints in the target version except for the Datapoints provided in this
+ request.
+
+If "add" or "remove", one of the version_id or environment query parameters may be provided.
+- attributes=Map? Additional fields to describe the Dataset. Helpful to separate Dataset versions from each other with details on how they were
+created or used.
+- commit_message=string? Message describing the changes made. If provided, a committed version of the Dataset is created. Otherwise, an uncommitted version
+is created.
+## Response
+
+### Body
+
+type_:DatasetResponse: path=string Path of the Dataset, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Dataset. Starts with \`ds_\`.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+name=string Name of the Dataset, which is used as a unique identifier.
+version_id=string Unique identifier for the specific Dataset Version. If no query params provided, the default deployed Dataset Version is returned. Starts with \`dsv_\`.
+type=dataset?
+environments=List? The list of environments the Dataset Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Dataset.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Dataset Version.
+last_used_at=datetime
+commit_message=string? Message describing the changes made. If provided, a committed version of the Dataset is created. Otherwise, an uncommitted version is created.
+datapoints_count=integer The number of Datapoints in this Dataset version.
+datapoints=List? The list of Datapoints in this Dataset version. Only provided if explicitly requested.
+attributes=Map? Additional fields to describe the Dataset. Helpful to separate Dataset versions from each other with details on how they were created or used. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -156279,7 +173566,7 @@ you can add a unique identifier to the Datapoint's inputs such as \`{_dedupe_id:
"method": "POST",
"slug": "docs/v5/api-reference/datasets/upsert",
"title": "Upsert",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -158600,16 +175887,10 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/datasets/list",
- "title": "Datasets",
- },
+ "Humanloop API",
+ "Datasets",
],
- "description": "Retrieve the Dataset with the given ID.
+ "content": "Retrieve the Dataset with the given ID.
Unless \`include_datapoints\` is set to \`true\`, the response will not include
the Datapoints.
@@ -158617,7 +175898,40 @@ Use the List Datapoints endpoint (\`GET /{id}/datapoints\`) to efficiently
retrieve Datapoints for a large Dataset.
By default, the deployed version of the Dataset is returned. Use the query parameters
-\`version_id\` or \`environment\` to target a specific version of the Dataset.",
+\`version_id\` or \`environment\` to target a specific version of the Dataset.
+## Path Parameters
+
+- /datasets/
+- id
+## Query Parameters
+
+- version_id=string? A specific Version ID of the Dataset to retrieve.
+- environment=string? Name of the Environment to retrieve a deployed Version from.
+- include_datapoints=boolean? If set to true, include all Datapoints in the response. Defaults to false. Consider using the paginated List Datapoints endpoint
+instead.
+## Response
+
+### Body
+
+type_:DatasetResponse: path=string Path of the Dataset, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Dataset. Starts with \`ds_\`.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+name=string Name of the Dataset, which is used as a unique identifier.
+version_id=string Unique identifier for the specific Dataset Version. If no query params provided, the default deployed Dataset Version is returned. Starts with \`dsv_\`.
+type=dataset?
+environments=List? The list of environments the Dataset Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Dataset.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Dataset Version.
+last_used_at=datetime
+commit_message=string? Message describing the changes made. If provided, a committed version of the Dataset is created. Otherwise, an uncommitted version is created.
+datapoints_count=integer The number of Datapoints in this Dataset version.
+datapoints=List? The list of Datapoints in this Dataset version. Only provided if explicitly requested.
+attributes=Map? Additional fields to describe the Dataset. Helpful to separate Dataset versions from each other with details on how they were created or used. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -158633,7 +175947,7 @@ By default, the deployed version of the Dataset is returned. Use the query param
"method": "GET",
"slug": "docs/v5/api-reference/datasets/get",
"title": "Get",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -160519,54 +177833,7 @@ An enumeration.",
"title": "Errors",
},
],
- "description": "Validation Error",
- "endpointPath": [
- {
- "type": "literal",
- "value": "/datasets/",
- },
- {
- "type": "pathParameter",
- "value": "id",
- },
- ],
- "extends": undefined,
- "indexSegmentId": "0",
- "isResponseStream": false,
- "method": "GET",
- "slug": "docs/v5/api-reference/datasets/get#response.error.UnprocessableEntity",
- "title": "Unprocessable Entity",
- "type": "endpoint-field-v1",
- "version": {
- "id": "v5.0",
- "slug": "docs/getting-started/overview",
- },
- },
- {
- "availability": undefined,
- "breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/datasets/list",
- "title": "Datasets",
- },
- {
- "slug": "docs/v5/api-reference/datasets/get",
- "title": "Get",
- },
- {
- "slug": "docs/v5/api-reference/datasets/get#response",
- "title": "Response",
- },
- {
- "slug": "docs/v5/api-reference/datasets/get#response.error",
- "title": "Errors",
- },
- ],
- "description": undefined,
+ "description": "Validation Error",
"endpointPath": [
{
"type": "literal",
@@ -160612,10 +177879,6 @@ An enumeration.",
"slug": "docs/v5/api-reference/datasets/get#response.error",
"title": "Errors",
},
- {
- "slug": "docs/v5/api-reference/datasets/get#response.error.UnprocessableEntity",
- "title": "Unprocessable Entity",
- },
],
"description": undefined,
"endpointPath": [
@@ -160632,8 +177895,8 @@ An enumeration.",
"indexSegmentId": "0",
"isResponseStream": false,
"method": "GET",
- "slug": "docs/v5/api-reference/datasets/get#response.error.UnprocessableEntity.detail",
- "title": "detail",
+ "slug": "docs/v5/api-reference/datasets/get#response.error.UnprocessableEntity",
+ "title": "Unprocessable Entity",
"type": "endpoint-field-v1",
"version": {
"id": "v5.0",
@@ -160692,6 +177955,7 @@ An enumeration.",
},
},
{
+ "availability": undefined,
"breadcrumbs": [
{
"slug": "docs/api-reference",
@@ -160701,8 +177965,56 @@ An enumeration.",
"slug": "docs/api-reference/datasets/list",
"title": "Datasets",
},
+ {
+ "slug": "docs/v5/api-reference/datasets/get",
+ "title": "Get",
+ },
+ {
+ "slug": "docs/v5/api-reference/datasets/get#response",
+ "title": "Response",
+ },
+ {
+ "slug": "docs/v5/api-reference/datasets/get#response.error",
+ "title": "Errors",
+ },
+ {
+ "slug": "docs/v5/api-reference/datasets/get#response.error.UnprocessableEntity",
+ "title": "Unprocessable Entity",
+ },
],
- "description": "Delete the Dataset with the given ID.",
+ "description": undefined,
+ "endpointPath": [
+ {
+ "type": "literal",
+ "value": "/datasets/",
+ },
+ {
+ "type": "pathParameter",
+ "value": "id",
+ },
+ ],
+ "extends": undefined,
+ "indexSegmentId": "0",
+ "isResponseStream": false,
+ "method": "GET",
+ "slug": "docs/v5/api-reference/datasets/get#response.error.UnprocessableEntity.detail",
+ "title": "detail",
+ "type": "endpoint-field-v1",
+ "version": {
+ "id": "v5.0",
+ "slug": "docs/getting-started/overview",
+ },
+ },
+ {
+ "breadcrumbs": [
+ "Humanloop API",
+ "Datasets",
+ ],
+ "content": "Delete the Dataset with the given ID.
+## Path Parameters
+
+- /datasets/
+- id",
"endpointPath": [
{
"type": "literal",
@@ -160718,7 +178030,7 @@ An enumeration.",
"method": "DELETE",
"slug": "docs/v5/api-reference/datasets/delete",
"title": "Delete",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -161016,16 +178328,43 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/datasets/list",
- "title": "Datasets",
- },
+ "Humanloop API",
+ "Datasets",
],
- "description": "Move the Dataset to a different path or change the name.",
+ "content": "Move the Dataset to a different path or change the name.
+## Path Parameters
+
+- /datasets/
+- id
+## Request
+
+### Body
+
+- path=string? Path of the Dataset including the Dataset name, which is used as a unique identifier.
+- name=string? Name of the Dataset, which is used as a unique identifier.
+## Response
+
+### Body
+
+type_:DatasetResponse: path=string Path of the Dataset, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Dataset. Starts with \`ds_\`.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+name=string Name of the Dataset, which is used as a unique identifier.
+version_id=string Unique identifier for the specific Dataset Version. If no query params provided, the default deployed Dataset Version is returned. Starts with \`dsv_\`.
+type=dataset?
+environments=List? The list of environments the Dataset Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Dataset.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Dataset Version.
+last_used_at=datetime
+commit_message=string? Message describing the changes made. If provided, a committed version of the Dataset is created. Otherwise, an uncommitted version is created.
+datapoints_count=integer The number of Datapoints in this Dataset version.
+datapoints=List? The list of Datapoints in this Dataset version. Only provided if explicitly requested.
+attributes=Map? Additional fields to describe the Dataset. Helpful to separate Dataset versions from each other with details on how they were created or used. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -161041,7 +178380,7 @@ An enumeration.",
"method": "PATCH",
"slug": "docs/v5/api-reference/datasets/move",
"title": "Move",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -162944,16 +180283,29 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/datasets/list",
- "title": "Datasets",
- },
+ "Humanloop API",
+ "Datasets",
],
- "description": "List all Datapoints for the Dataset with the given ID.",
+ "content": "List all Datapoints for the Dataset with the given ID.
+## Path Parameters
+
+- /datasets/
+- id
+- /datapoints
+## Query Parameters
+
+- version_id=string? A specific Version ID of the Dataset to retrieve.
+- environment=string? Name of the Environment to retrieve a deployed Version from.
+- page=integer? Page number for pagination.
+- size=integer? Page size for pagination. Number of Datapoints to fetch.
+## Response
+
+### Body
+
+type_:PaginatedDatapointResponse: records=List
+page=integer
+size=integer
+total=integer : ",
"endpointPath": [
{
"type": "literal",
@@ -162973,7 +180325,7 @@ An enumeration.",
"method": "GET",
"slug": "docs/v5/api-reference/datasets/list-datapoints",
"title": "List Datapoints",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -164185,16 +181537,26 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/datasets/list",
- "title": "Datasets",
- },
+ "Humanloop API",
+ "Datasets",
],
- "description": "Get a list of the versions for a Dataset.",
+ "content": "Get a list of the versions for a Dataset.
+## Path Parameters
+
+- /datasets/
+- id
+- /versions
+## Query Parameters
+
+- status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration.? Filter versions by status: 'uncommitted', 'committed'. If no status is provided, all versions are returned.
+## Response
+
+### Body
+
+type_:ListDatasets: records=List The list of Datasets. : ",
"endpointPath": [
{
"type": "literal",
@@ -164214,7 +181576,7 @@ An enumeration.",
"method": "GET",
"slug": "docs/v5/api-reference/datasets/list-versions",
"title": "List Versions",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -165928,18 +183290,47 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/datasets/list",
- "title": "Datasets",
- },
+ "Humanloop API",
+ "Datasets",
],
- "description": "Commit a version of the Dataset with a commit message.
+ "content": "Commit a version of the Dataset with a commit message.
+
+If the version is already committed, an exception will be raised.
+## Path Parameters
+
+- /datasets/
+- id
+- /versions/
+- version_id
+- /commit
+## Request
-If the version is already committed, an exception will be raised.",
+### Body
+
+type_:CommitRequest: commit_message=string Message describing the changes made. :
+## Response
+
+### Body
+
+type_:DatasetResponse: path=string Path of the Dataset, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Dataset. Starts with \`ds_\`.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+name=string Name of the Dataset, which is used as a unique identifier.
+version_id=string Unique identifier for the specific Dataset Version. If no query params provided, the default deployed Dataset Version is returned. Starts with \`dsv_\`.
+type=dataset?
+environments=List? The list of environments the Dataset Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Dataset.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Dataset Version.
+last_used_at=datetime
+commit_message=string? Message describing the changes made. If provided, a committed version of the Dataset is created. Otherwise, an uncommitted version is created.
+datapoints_count=integer The number of Datapoints in this Dataset version.
+datapoints=List? The list of Datapoints in this Dataset version. Only provided if explicitly requested.
+attributes=Map? Additional fields to describe the Dataset. Helpful to separate Dataset versions from each other with details on how they were created or used. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -165967,7 +183358,7 @@ If the version is already committed, an exception will be raised.",
"method": "POST",
"slug": "docs/v5/api-reference/datasets/commit",
"title": "Commit",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -168437,23 +185828,54 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/datasets/list",
- "title": "Datasets",
- },
+ "Humanloop API",
+ "Datasets",
],
- "description": "Add Datapoints from a CSV file to a Dataset.
+ "content": "Add Datapoints from a CSV file to a Dataset.
This will create a new committed version of the Dataset with the Datapoints from the CSV file.
If either \`version_id\` or \`environment\` is provided, the new version will be based on the specified version,
with the Datapoints from the CSV file added to the existing Datapoints in the version.
If neither \`version_id\` nor \`environment\` is provided, the new version will be based on the version
-of the Dataset that is deployed to the default Environment.",
+of the Dataset that is deployed to the default Environment.
+## Path Parameters
+
+- /datasets/
+- id
+- /datapoints/csv
+## Query Parameters
+
+- version_id=string? ID of the specific Dataset version to base the created Version on.
+- environment=string? Name of the Environment identifying a deployed Version to base the created Version on.
+## Request
+
+### Body
+
+- commit_message=string Commit message for the new Dataset version.
+## Response
+
+### Body
+
+type_:DatasetResponse: path=string Path of the Dataset, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Dataset. Starts with \`ds_\`.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+name=string Name of the Dataset, which is used as a unique identifier.
+version_id=string Unique identifier for the specific Dataset Version. If no query params provided, the default deployed Dataset Version is returned. Starts with \`dsv_\`.
+type=dataset?
+environments=List? The list of environments the Dataset Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Dataset.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Dataset Version.
+last_used_at=datetime
+commit_message=string? Message describing the changes made. If provided, a committed version of the Dataset is created. Otherwise, an uncommitted version is created.
+datapoints_count=integer The number of Datapoints in this Dataset version.
+datapoints=List? The list of Datapoints in this Dataset version. Only provided if explicitly requested.
+attributes=Map? Additional fields to describe the Dataset. Helpful to separate Dataset versions from each other with details on how they were created or used. Base type that all File Responses should inherit from.
+
+Attributes defined here are common to all File Responses and should be overridden
+in the inheriting classes with documentation and appropriate Field definitions.: ",
"endpointPath": [
{
"type": "literal",
@@ -168473,7 +185895,7 @@ of the Dataset that is deployed to the default Environment.",
"method": "POST",
"slug": "docs/v5/api-reference/datasets/upload-csv",
"title": "Upload Csv",
- "type": "endpoint-v4",
+ "type": "endpoint-v3",
"version": {
"id": "v5.0",
"slug": "docs/getting-started/overview",
@@ -170611,18 +188033,44 @@ An enumeration.",
},
{
"breadcrumbs": [
- {
- "slug": "docs/api-reference",
- "title": "Humanloop API",
- },
- {
- "slug": "docs/api-reference/datasets/list",
- "title": "Datasets",
- },
+ "Humanloop API",
+ "Datasets",
],
- "description": "Deploy Dataset to Environment.
+ "content": "Deploy Dataset to Environment.
+
+Set the deployed version for the specified Environment.
+## Path Parameters
-Set the deployed version for the specified Environment.",
+- /datasets/
+- id
+- /environments/
+- environment_id
+## Query Parameters
+
+- version_id=string Unique identifier for the specific version of the Dataset.
+## Response
+
+### Body
+
+type_:DatasetResponse: path=string Path of the Dataset, including the name, which is used as a unique identifier.
+id=string Unique identifier for the Dataset. Starts with \`ds_\`.
+directory_id=string? ID of the directory that the file is in on Humanloop.
+name=string Name of the Dataset, which is used as a unique identifier.
+version_id=string Unique identifier for the specific Dataset Version. If no query params provided, the default deployed Dataset Version is returned. Starts with \`dsv_\`.
+type=dataset?
+environments=List? The list of environments the Dataset Version is deployed to.
+created_at=datetime
+updated_at=datetime
+created_by=type_:UserResponse: unknown ? The user who created the Dataset.
+status=type_:VersionStatus: uncommitted (),committed (),deleted () An enumeration. The status of the Dataset Version.
+last_used_at=datetime
+commit_message=string? Message describing the changes made. If provided, a committed version of the Dataset is created. Otherwise, an uncommitted version is created.
+datapoints_count=integer The number of Datapoints in this Dataset version.
+datapoints=List? The list of Datapoints in this Dataset version. Only provided if explicitly requested.
+attributes=Map