Skip to content

Commit

Permalink
Refactorings and cleanups
Browse files Browse the repository at this point in the history
  • Loading branch information
ddebowczyk committed Oct 9, 2024
1 parent 02e8885 commit 347390b
Show file tree
Hide file tree
Showing 38 changed files with 626 additions and 335 deletions.
4 changes: 2 additions & 2 deletions docs/cookbook/examples/advanced/context_cache_llm.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ $inference = (new Inference)->withConnection('anthropic')->withCachedContext(
$response = $inference->create(
messages: [['role' => 'user', 'content' => 'CTO of lead gen software vendor']],
options: ['max_tokens' => 256],
)->asLLMResponse();
)->response();

print("----------------------------------------\n");
print("\n# Summary for CTO of lead gen vendor\n");
Expand All @@ -60,7 +60,7 @@ assert(Str::contains($response->content, 'lead', false));
$response2 = $inference->create(
messages: [['role' => 'user', 'content' => 'CIO of insurance company']],
options: ['max_tokens' => 256],
)->asLLMResponse();
)->response();

print("----------------------------------------\n");
print("\n# Summary for CIO of insurance company\n");
Expand Down
4 changes: 2 additions & 2 deletions evals/LLMModes/CompareModes.php
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ private function execute(string $connection, Mode $mode, bool $isStreamed) : Eva
notes: $answer,
isCorrect: $isCorrect,
timeElapsed: $timeElapsed,
inputTokens: $llmResponse->inputTokens,
outputTokens: $llmResponse->outputTokens,
inputTokens: $llmResponse->usage()->inputTokens,
outputTokens: $llmResponse->usage()->outputTokens,
);
} catch(Exception $e) {
$timeElapsed = microtime(true) - $time;
Expand Down
2 changes: 1 addition & 1 deletion evals/LLMModes/Modes.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public function callInferenceFor(string|array $query, Mode $mode, string $connec
Mode::MdJson => $this->forModeMdJson($query, $connection, $schema, $isStreamed),
Mode::Text => $this->forModeText($query, $connection, $isStreamed),
};
return $inferenceResponse->asLLMResponse();
return $inferenceResponse->response();
}

public function forModeTools(string|array $query, string $connection, array $schema, bool $isStreamed) : InferenceResponse {
Expand Down
4 changes: 2 additions & 2 deletions examples/A02_Advanced/ContextCacheLLM/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
$response = $inference->create(
messages: [['role' => 'user', 'content' => 'CTO of lead gen software vendor']],
options: ['max_tokens' => 256],
)->asLLMResponse();
)->response();

print("----------------------------------------\n");
print("\n# Summary for CTO of lead gen vendor\n");
Expand All @@ -60,7 +60,7 @@
$response2 = $inference->create(
messages: [['role' => 'user', 'content' => 'CIO of insurance company']],
options: ['max_tokens' => 256],
)->asLLMResponse();
)->response();

print("----------------------------------------\n");
print("\n# Summary for CIO of insurance company\n");
Expand Down
84 changes: 26 additions & 58 deletions examples/A03_Troubleshooting/TokenUsage/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,11 @@
## Overview

Some use cases require tracking the token usage of the API responses.
Currently, this can be done by listening to the `LLMResponseReceived`
and `PartialLLMResponseReceived` events and summing the token usage
of the responses.
This can be done by getting `Usage` object from Instructor LLM response
object.

Code below demonstrates how it can be implemented using Instructor
event listeners.

> Note: OpenAI API requires `stream_options` to be set to
> `['include_usage' => true]` to include token usage in the streamed
> responses.
Code below demonstrates how it can be retrieved for both sync and
streamed requests.

## Example

Expand All @@ -24,75 +19,48 @@
$loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');

use Cognesy\Instructor\Events\Inference\LLMResponseReceived;
use Cognesy\Instructor\Events\Inference\PartialLLMResponseReceived;
use Cognesy\Instructor\Features\LLM\Data\LLMResponse;
use Cognesy\Instructor\Features\LLM\Data\PartialLLMResponse;
use Cognesy\Instructor\Features\LLM\Data\Usage;
use Cognesy\Instructor\Instructor;

class User {
public int $age;
public string $name;
}

class TokenCounter {
public int $input = 0;
public int $output = 0;
public int $cacheCreation = 0;
public int $cacheRead = 0;

public function add(LLMResponse|PartialLLMResponse $response) {
$this->input += $response->inputTokens;
$this->output += $response->outputTokens;
$this->cacheCreation += $response->cacheCreationTokens;
$this->cacheRead += $response->cacheReadTokens;
}

public function reset() {
$this->input = 0;
$this->output = 0;
$this->cacheCreation = 0;
$this->cacheRead = 0;
}

public function print() {
echo "Input tokens: $this->input\n";
echo "Output tokens: $this->output\n";
echo "Cache creation tokens: $this->cacheCreation\n";
echo "Cache read tokens: $this->cacheRead\n";
}
function printUsage(Usage $usage) : void {
echo "Input tokens: $usage->inputTokens\n";
echo "Output tokens: $usage->outputTokens\n";
echo "Cache creation tokens: $usage->cacheWriteTokens\n";
echo "Cache read tokens: $usage->cacheReadTokens\n";
echo "Reasoning tokens: $usage->reasoningTokens\n";
}

$counter = new TokenCounter();

echo "COUNTING TOKENS FOR SYNC RESPONSE\n";
$text = "Jason is 25 years old and works as an engineer.";
$instructor = (new Instructor)
->onEvent(LLMResponseReceived::class, fn(LLMResponseReceived $e) => $counter->add($e->llmResponse))
->respond(
$response = (new Instructor)
->request(
messages: $text,
responseModel: User::class,
);
)->response();

echo "\nTEXT: $text\n";
assert($counter->input > 0);
assert($counter->output > 0);
$counter->print();
assert($response->usage()->total() > 0);
printUsage($response->usage());

// Reset the counter
$counter->reset();

echo "\n\nCOUNTING TOKENS FOR STREAMED RESPONSE\n";
$text = "Anna is 19 years old.";
$instructor = (new Instructor)
->onEvent(PartialLLMResponseReceived::class, fn(PartialLLMResponseReceived $e) => $counter->add($e->partialLLMResponse))
->respond(
$stream = (new Instructor)
->request(
messages: $text,
responseModel: User::class,
options: ['stream' => true, 'stream_options' => ['include_usage' => true]],
);
options: ['stream' => true],
)
->stream();

$response = $stream->final();
echo "\nTEXT: $text\n";
assert($counter->input > 0);
assert($counter->output > 0);
$counter->print();
assert($stream->usage()->total() > 0);
printUsage($stream->usage());
?>
```
3 changes: 2 additions & 1 deletion examples/A05_Extras/LLM/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@
messages: [['role' => 'user', 'content' => 'Describe capital of Brasil']],
options: ['max_tokens' => 128, 'stream' => true]
)
->stream();
->stream()
->responses();

echo "USER: Describe capital of Brasil\n";
echo "ASSISTANT: ";
Expand Down
7 changes: 7 additions & 0 deletions src/Extras/Embeddings/Drivers/AzureOpenAIDriver.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
use Cognesy\Instructor\Extras\Embeddings\EmbeddingsResponse;
use Cognesy\Instructor\Features\Http\Contracts\CanHandleHttp;
use Cognesy\Instructor\Features\Http\HttpClient;
use Cognesy\Instructor\Features\LLM\Data\Usage;

class AzureOpenAIDriver implements CanVectorize
{
Expand Down Expand Up @@ -67,6 +68,12 @@ protected function toResponse(array $response) : EmbeddingsResponse {
callback: fn($item) => new Vector(values: $item['embedding'], id: $item['index']),
array: $response['data']
),
usage: $this->makeUsage($response),
);
}

protected function makeUsage(array $response): Usage {
return new Usage(
inputTokens: $response['usage']['prompt_tokens'] ?? 0,
outputTokens: ($response['usage']['total_tokens'] ?? 0) - ($response['usage']['prompt_tokens'] ?? 0),
);
Expand Down
9 changes: 8 additions & 1 deletion src/Extras/Embeddings/Drivers/CohereDriver.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use Cognesy\Instructor\Extras\Embeddings\EmbeddingsResponse;
use Cognesy\Instructor\Features\Http\Contracts\CanHandleHttp;
use Cognesy\Instructor\Features\Http\HttpClient;
use Cognesy\Instructor\Features\LLM\Data\Usage;

class CohereDriver implements CanVectorize
{
Expand Down Expand Up @@ -57,8 +58,14 @@ protected function toResponse(array $response) : EmbeddingsResponse {
}
return new EmbeddingsResponse(
vectors: $vectors,
usage: $this->makeUsage($response),
);
}

private function makeUsage(array $response) : Usage {
return new Usage(
inputTokens: $response['meta']['billed_units']['input_tokens'] ?? 0,
outputTokens: 0,
outputTokens: $response['meta']['billed_units']['output_tokens'] ?? 0,
);
}
}
11 changes: 9 additions & 2 deletions src/Extras/Embeddings/Drivers/GeminiDriver.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
use Cognesy\Instructor\Extras\Embeddings\EmbeddingsResponse;
use Cognesy\Instructor\Features\Http\Contracts\CanHandleHttp;
use Cognesy\Instructor\Features\Http\HttpClient;
use Cognesy\Instructor\Features\LLM\Data\Usage;

class GeminiDriver implements CanVectorize
{
Expand Down Expand Up @@ -64,12 +65,18 @@ protected function toResponse(array $response) : EmbeddingsResponse {
}
return new EmbeddingsResponse(
vectors: $vectors,
inputTokens: $this->inputCharacters,
outputTokens: 0,
usage: $this->makeUsage($response),
);
}

private function countCharacters(array $input) : int {
return array_sum(array_map(fn($item) => strlen($item), $input));
}

private function makeUsage(array $response) : Usage {
return new Usage(
inputTokens: $this->inputCharacters,
outputTokens: 0,
);
}
}
7 changes: 7 additions & 0 deletions src/Extras/Embeddings/Drivers/JinaDriver.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use Cognesy\Instructor\Extras\Embeddings\EmbeddingsResponse;
use Cognesy\Instructor\Features\Http\Contracts\CanHandleHttp;
use Cognesy\Instructor\Features\Http\HttpClient;
use Cognesy\Instructor\Features\LLM\Data\Usage;

class JinaDriver implements CanVectorize
{
Expand Down Expand Up @@ -60,6 +61,12 @@ protected function toResponse(array $response) : EmbeddingsResponse {
fn($item) => new Vector(values: $item['embedding'], id: $item['index']),
$response['data']
),
usage: $this->makeUsage($response),
);
}

private function makeUsage(array $response) : Usage {
return new Usage(
inputTokens: $response['usage']['prompt_tokens'] ?? 0,
outputTokens: ($response['usage']['total_tokens'] ?? 0) - ($response['usage']['prompt_tokens'] ?? 0),
);
Expand Down
7 changes: 7 additions & 0 deletions src/Extras/Embeddings/Drivers/OpenAIDriver.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use Cognesy\Instructor\Extras\Embeddings\EmbeddingsResponse;
use Cognesy\Instructor\Features\Http\Contracts\CanHandleHttp;
use Cognesy\Instructor\Features\Http\HttpClient;
use Cognesy\Instructor\Features\LLM\Data\Usage;

class OpenAIDriver implements CanVectorize
{
Expand Down Expand Up @@ -54,6 +55,12 @@ protected function toResponse(array $response) : EmbeddingsResponse {
callback: fn($item) => new Vector(values: $item['embedding'], id: $item['index']),
array: $response['data']
),
usage: $this->toUsage($response),
);
}

private function toUsage(array $response) : Usage {
return new Usage(
inputTokens: $response['usage']['prompt_tokens'] ?? 0,
outputTokens: ($response['usage']['total_tokens'] ?? 0) - ($response['usage']['prompt_tokens'] ?? 0),
);
Expand Down
26 changes: 17 additions & 9 deletions src/Extras/Embeddings/EmbeddingsResponse.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
namespace Cognesy\Instructor\Extras\Embeddings;

use Cognesy\Instructor\Extras\Embeddings\Data\Vector;
use Cognesy\Instructor\Features\LLM\Data\Usage;

class EmbeddingsResponse
{
public function __construct(
/** @var Vector[] */
public array $vectors,
public int $inputTokens,
public int $outputTokens,
public ?Usage $usage,
) {}

public function first() : Vector {
Expand All @@ -25,21 +25,29 @@ public function all() : array {
return $this->vectors;
}

public function usage() : Usage {
return $this->usage;
}

/**
* @param int $index
* @return EmbeddingsResponse[]
*/
public function split(int $index) : array {
return [
new EmbeddingsResponse(
array_slice($this->vectors, 0, $index),
$this->inputTokens,
$this->outputTokens,
vectors: array_slice($this->vectors, 0, $index),
usage: new Usage(
inputTokens: $this->usage()->inputTokens,
outputTokens: $this->usage()->outputTokens,
),
),
new EmbeddingsResponse(
array_slice($this->vectors, $index),
0,
0,
vectors: array_slice($this->vectors, $index),
usage: new Usage(
inputTokens: 0,
outputTokens: 0,
),
),
];
}
Expand All @@ -52,6 +60,6 @@ public function toValuesArray() : array {
}

public function totalTokens() : int {
return $this->inputTokens + $this->outputTokens;
return $this->usage()->total();
}
}
14 changes: 12 additions & 2 deletions src/Features/Core/InstructorResponse.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
use Cognesy\Instructor\Events\EventDispatcher;
use Cognesy\Instructor\Events\Instructor\InstructorDone;
use Cognesy\Instructor\Features\Core\Data\Request;
use Cognesy\Instructor\Features\LLM\Data\LLMResponse;
use Exception;

class InstructorResponse
Expand Down Expand Up @@ -35,15 +36,24 @@ public function get() : mixed {
return $result->value();
}

/**
* Executes the request and returns LLM response object
*/
public function response() : LLMResponse {
$response = $this->requestHandler->responseFor($this->request);
$this->events->dispatch(new InstructorDone(['result' => $response->value()]));
return $response;
}

/**
* Executes the request and returns the response stream
*/
public function stream() : Stream {
public function stream() : InstructorStream {
// TODO: do we need this? cannot we just turn streaming on?
if (!$this->request->isStream()) {
throw new Exception('Instructor::stream() method requires response streaming: set "stream" = true in the request options.');
}
$stream = $this->requestHandler->streamResponseFor($this->request);
return new Stream($stream, $this->events);
return new InstructorStream($stream, $this->events);
}
}
Loading

0 comments on commit 347390b

Please sign in to comment.