Refactorings and cleanups

cognesy · Oct 9, 2024 · 347390b · 347390b
1 parent 02e8885
commit 347390b
Show file tree

Hide file tree

Showing 38 changed files with 626 additions and 335 deletions.
diff --git a/docs/cookbook/examples/advanced/context_cache_llm.mdx b/docs/cookbook/examples/advanced/context_cache_llm.mdx
@@ -45,7 +45,7 @@ $inference = (new Inference)->withConnection('anthropic')->withCachedContext(
 $response = $inference->create(
     messages: [['role' => 'user', 'content' => 'CTO of lead gen software vendor']],
     options: ['max_tokens' => 256],
-)->asLLMResponse();
+)->response();
 
 print("----------------------------------------\n");
 print("\n# Summary for CTO of lead gen vendor\n");
@@ -60,7 +60,7 @@ assert(Str::contains($response->content, 'lead', false));
 $response2 = $inference->create(
     messages: [['role' => 'user', 'content' => 'CIO of insurance company']],
     options: ['max_tokens' => 256],
-)->asLLMResponse();
+)->response();
 
 print("----------------------------------------\n");
 print("\n# Summary for CIO of insurance company\n");

diff --git a/evals/LLMModes/CompareModes.php b/evals/LLMModes/CompareModes.php
@@ -66,8 +66,8 @@ private function execute(string $connection, Mode $mode, bool $isStreamed) : Eva
                 notes: $answer,
                 isCorrect: $isCorrect,
                 timeElapsed: $timeElapsed,
-                inputTokens: $llmResponse->inputTokens,
-                outputTokens: $llmResponse->outputTokens,
+                inputTokens: $llmResponse->usage()->inputTokens,
+                outputTokens: $llmResponse->usage()->outputTokens,
             );
         } catch(Exception $e) {
             $timeElapsed = microtime(true) - $time;

diff --git a/evals/LLMModes/Modes.php b/evals/LLMModes/Modes.php
@@ -34,7 +34,7 @@ public function callInferenceFor(string|array $query, Mode $mode, string $connec
             Mode::MdJson => $this->forModeMdJson($query, $connection, $schema, $isStreamed),
             Mode::Text => $this->forModeText($query, $connection, $isStreamed),
         };
-        return $inferenceResponse->asLLMResponse();
+        return $inferenceResponse->response();
     }
 
     public function forModeTools(string|array $query, string $connection, array $schema, bool $isStreamed) : InferenceResponse {

diff --git a/examples/A02_Advanced/ContextCacheLLM/run.php b/examples/A02_Advanced/ContextCacheLLM/run.php
@@ -45,7 +45,7 @@
 $response = $inference->create(
     messages: [['role' => 'user', 'content' => 'CTO of lead gen software vendor']],
     options: ['max_tokens' => 256],
-)->asLLMResponse();
+)->response();
 
 print("----------------------------------------\n");
 print("\n# Summary for CTO of lead gen vendor\n");
@@ -60,7 +60,7 @@
 $response2 = $inference->create(
     messages: [['role' => 'user', 'content' => 'CIO of insurance company']],
     options: ['max_tokens' => 256],
-)->asLLMResponse();
+)->response();
 
 print("----------------------------------------\n");
 print("\n# Summary for CIO of insurance company\n");

diff --git a/examples/A03_Troubleshooting/TokenUsage/run.php b/examples/A03_Troubleshooting/TokenUsage/run.php
@@ -6,16 +6,11 @@
 ## Overview
 
 Some use cases require tracking the token usage of the API responses.
-Currently, this can be done by listening to the `LLMResponseReceived`
-and `PartialLLMResponseReceived` events and summing the token usage
-of the responses.
+This can be done by getting `Usage` object from Instructor LLM response
+object.
 
-Code below demonstrates how it can be implemented using Instructor
-event listeners.
-
-> Note: OpenAI API requires `stream_options` to be set to
-> `['include_usage' => true]` to include token usage in the streamed
-> responses.
+Code below demonstrates how it can be retrieved for both sync and
+streamed requests.
 
 ## Example
 
@@ -24,75 +19,48 @@
 $loader = require 'vendor/autoload.php';
 $loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');
 
-use Cognesy\Instructor\Events\Inference\LLMResponseReceived;
-use Cognesy\Instructor\Events\Inference\PartialLLMResponseReceived;
-use Cognesy\Instructor\Features\LLM\Data\LLMResponse;
-use Cognesy\Instructor\Features\LLM\Data\PartialLLMResponse;
+use Cognesy\Instructor\Features\LLM\Data\Usage;
 use Cognesy\Instructor\Instructor;
 
 class User {
     public int $age;
     public string $name;
 }
 
-class TokenCounter {
-    public int $input = 0;
-    public int $output = 0;
-    public int $cacheCreation = 0;
-    public int $cacheRead = 0;
-
-    public function add(LLMResponse|PartialLLMResponse $response) {
-        $this->input += $response->inputTokens;
-        $this->output += $response->outputTokens;
-        $this->cacheCreation += $response->cacheCreationTokens;
-        $this->cacheRead += $response->cacheReadTokens;
-    }
-
-    public function reset() {
-        $this->input = 0;
-        $this->output = 0;
-        $this->cacheCreation = 0;
-        $this->cacheRead = 0;
-    }
-
-    public function print() {
-        echo "Input tokens: $this->input\n";
-        echo "Output tokens: $this->output\n";
-        echo "Cache creation tokens: $this->cacheCreation\n";
-        echo "Cache read tokens: $this->cacheRead\n";
-    }
+function printUsage(Usage $usage) : void {
+    echo "Input tokens: $usage->inputTokens\n";
+    echo "Output tokens: $usage->outputTokens\n";
+    echo "Cache creation tokens: $usage->cacheWriteTokens\n";
+    echo "Cache read tokens: $usage->cacheReadTokens\n";
+    echo "Reasoning tokens: $usage->reasoningTokens\n";
 }
 
-$counter = new TokenCounter();
-
 echo "COUNTING TOKENS FOR SYNC RESPONSE\n";
 $text = "Jason is 25 years old and works as an engineer.";
-$instructor = (new Instructor)
-    ->onEvent(LLMResponseReceived::class, fn(LLMResponseReceived $e) => $counter->add($e->llmResponse))
-    ->respond(
+$response = (new Instructor)
+    ->request(
         messages: $text,
         responseModel: User::class,
-    );
+    )->response();
+
 echo "\nTEXT: $text\n";
-assert($counter->input > 0);
-assert($counter->output > 0);
-$counter->print();
+assert($response->usage()->total() > 0);
+printUsage($response->usage());
 
-// Reset the counter
-$counter->reset();
 
 echo "\n\nCOUNTING TOKENS FOR STREAMED RESPONSE\n";
 $text = "Anna is 19 years old.";
-$instructor = (new Instructor)
-    ->onEvent(PartialLLMResponseReceived::class, fn(PartialLLMResponseReceived $e) => $counter->add($e->partialLLMResponse))
-    ->respond(
+$stream = (new Instructor)
+    ->request(
         messages: $text,
         responseModel: User::class,
-        options: ['stream' => true, 'stream_options' => ['include_usage' => true]],
-);
+        options: ['stream' => true],
+    )
+    ->stream();
+
+$response = $stream->final();
 echo "\nTEXT: $text\n";
-assert($counter->input > 0);
-assert($counter->output > 0);
-$counter->print();
+assert($stream->usage()->total() > 0);
+printUsage($stream->usage());
 ?>
 ```
diff --git a/examples/A05_Extras/LLM/run.php b/examples/A05_Extras/LLM/run.php
@@ -56,7 +56,8 @@
         messages: [['role' => 'user', 'content' => 'Describe capital of Brasil']],
         options: ['max_tokens' => 128, 'stream' => true]
     )
-    ->stream();
+    ->stream()
+    ->responses();
 
 echo "USER: Describe capital of Brasil\n";
 echo "ASSISTANT: ";

diff --git a/src/Extras/Embeddings/Drivers/AzureOpenAIDriver.php b/src/Extras/Embeddings/Drivers/AzureOpenAIDriver.php
@@ -7,6 +7,7 @@
 use Cognesy\Instructor\Extras\Embeddings\EmbeddingsResponse;
 use Cognesy\Instructor\Features\Http\Contracts\CanHandleHttp;
 use Cognesy\Instructor\Features\Http\HttpClient;
+use Cognesy\Instructor\Features\LLM\Data\Usage;
 
 class AzureOpenAIDriver implements CanVectorize
 {
@@ -67,6 +68,12 @@ protected function toResponse(array $response) : EmbeddingsResponse {
                 callback: fn($item) => new Vector(values: $item['embedding'], id: $item['index']),
                 array: $response['data']
             ),
+            usage: $this->makeUsage($response),
+        );
+    }
+
+    protected function makeUsage(array $response): Usage {
+        return new Usage(
             inputTokens: $response['usage']['prompt_tokens'] ?? 0,
             outputTokens: ($response['usage']['total_tokens'] ?? 0) - ($response['usage']['prompt_tokens'] ?? 0),
         );

diff --git a/src/Extras/Embeddings/Drivers/CohereDriver.php b/src/Extras/Embeddings/Drivers/CohereDriver.php
@@ -8,6 +8,7 @@
 use Cognesy\Instructor\Extras\Embeddings\EmbeddingsResponse;
 use Cognesy\Instructor\Features\Http\Contracts\CanHandleHttp;
 use Cognesy\Instructor\Features\Http\HttpClient;
+use Cognesy\Instructor\Features\LLM\Data\Usage;
 
 class CohereDriver implements CanVectorize
 {
@@ -57,8 +58,14 @@ protected function toResponse(array $response) : EmbeddingsResponse {
         }
         return new EmbeddingsResponse(
             vectors: $vectors,
+            usage: $this->makeUsage($response),
+        );
+    }
+
+    private function makeUsage(array $response) : Usage {
+        return new Usage(
             inputTokens: $response['meta']['billed_units']['input_tokens'] ?? 0,
-            outputTokens: 0,
+            outputTokens: $response['meta']['billed_units']['output_tokens'] ?? 0,
         );
     }
 }
diff --git a/src/Extras/Embeddings/Drivers/GeminiDriver.php b/src/Extras/Embeddings/Drivers/GeminiDriver.php
@@ -7,6 +7,7 @@
 use Cognesy\Instructor\Extras\Embeddings\EmbeddingsResponse;
 use Cognesy\Instructor\Features\Http\Contracts\CanHandleHttp;
 use Cognesy\Instructor\Features\Http\HttpClient;
+use Cognesy\Instructor\Features\LLM\Data\Usage;
 
 class GeminiDriver implements CanVectorize
 {
@@ -64,12 +65,18 @@ protected function toResponse(array $response) : EmbeddingsResponse {
         }
         return new EmbeddingsResponse(
             vectors: $vectors,
-            inputTokens: $this->inputCharacters,
-            outputTokens: 0,
+            usage: $this->makeUsage($response),
         );
     }
 
     private function countCharacters(array $input) : int {
         return array_sum(array_map(fn($item) => strlen($item), $input));
     }
+
+    private function makeUsage(array $response) : Usage {
+        return new Usage(
+            inputTokens: $this->inputCharacters,
+            outputTokens: 0,
+        );
+    }
 }
diff --git a/src/Extras/Embeddings/Drivers/JinaDriver.php b/src/Extras/Embeddings/Drivers/JinaDriver.php
@@ -8,6 +8,7 @@
 use Cognesy\Instructor\Extras\Embeddings\EmbeddingsResponse;
 use Cognesy\Instructor\Features\Http\Contracts\CanHandleHttp;
 use Cognesy\Instructor\Features\Http\HttpClient;
+use Cognesy\Instructor\Features\LLM\Data\Usage;
 
 class JinaDriver implements CanVectorize
 {
@@ -60,6 +61,12 @@ protected function toResponse(array $response) : EmbeddingsResponse {
                 fn($item) => new Vector(values: $item['embedding'], id: $item['index']),
                 $response['data']
             ),
+            usage: $this->makeUsage($response),
+        );
+    }
+
+    private function makeUsage(array $response) : Usage {
+        return new Usage(
             inputTokens: $response['usage']['prompt_tokens'] ?? 0,
             outputTokens: ($response['usage']['total_tokens'] ?? 0) - ($response['usage']['prompt_tokens'] ?? 0),
         );

diff --git a/src/Extras/Embeddings/Drivers/OpenAIDriver.php b/src/Extras/Embeddings/Drivers/OpenAIDriver.php
@@ -8,6 +8,7 @@
 use Cognesy\Instructor\Extras\Embeddings\EmbeddingsResponse;
 use Cognesy\Instructor\Features\Http\Contracts\CanHandleHttp;
 use Cognesy\Instructor\Features\Http\HttpClient;
+use Cognesy\Instructor\Features\LLM\Data\Usage;
 
 class OpenAIDriver implements CanVectorize
 {
@@ -54,6 +55,12 @@ protected function toResponse(array $response) : EmbeddingsResponse {
                 callback: fn($item) => new Vector(values: $item['embedding'], id: $item['index']),
                 array: $response['data']
             ),
+            usage: $this->toUsage($response),
+        );
+    }
+
+    private function toUsage(array $response) : Usage {
+        return new Usage(
             inputTokens: $response['usage']['prompt_tokens'] ?? 0,
             outputTokens: ($response['usage']['total_tokens'] ?? 0) - ($response['usage']['prompt_tokens'] ?? 0),
         );

diff --git a/src/Extras/Embeddings/EmbeddingsResponse.php b/src/Extras/Embeddings/EmbeddingsResponse.php
@@ -3,14 +3,14 @@
 namespace Cognesy\Instructor\Extras\Embeddings;
 
 use Cognesy\Instructor\Extras\Embeddings\Data\Vector;
+use Cognesy\Instructor\Features\LLM\Data\Usage;
 
 class EmbeddingsResponse
 {
     public function __construct(
         /** @var Vector[] */
         public array $vectors,
-        public int $inputTokens,
-        public int $outputTokens,
+        public ?Usage $usage,
     ) {}
 
     public function first() : Vector {
@@ -25,21 +25,29 @@ public function all() : array {
         return $this->vectors;
     }
 
+    public function usage() : Usage {
+        return $this->usage;
+    }
+
     /**
      * @param int $index
      * @return EmbeddingsResponse[]
      */
     public function split(int $index) : array {
         return [
             new EmbeddingsResponse(
-                array_slice($this->vectors, 0, $index),
-                $this->inputTokens,
-                $this->outputTokens,
+                vectors: array_slice($this->vectors, 0, $index),
+                usage: new Usage(
+                    inputTokens: $this->usage()->inputTokens,
+                    outputTokens: $this->usage()->outputTokens,
+                ),
             ),
             new EmbeddingsResponse(
-                array_slice($this->vectors, $index),
-                0,
-                0,
+                vectors: array_slice($this->vectors, $index),
+                usage: new Usage(
+                    inputTokens: 0,
+                    outputTokens: 0,
+                ),
             ),
         ];
     }
@@ -52,6 +60,6 @@ public function toValuesArray() : array {
     }
 
     public function totalTokens() : int {
-        return $this->inputTokens + $this->outputTokens;
+        return $this->usage()->total();
     }
 }
diff --git a/src/Features/Core/InstructorResponse.php b/src/Features/Core/InstructorResponse.php
@@ -5,6 +5,7 @@
 use Cognesy\Instructor\Events\EventDispatcher;
 use Cognesy\Instructor\Events\Instructor\InstructorDone;
 use Cognesy\Instructor\Features\Core\Data\Request;
+use Cognesy\Instructor\Features\LLM\Data\LLMResponse;
 use Exception;
 
 class InstructorResponse
@@ -35,15 +36,24 @@ public function get() : mixed {
         return $result->value();
     }
 
+    /**
+     * Executes the request and returns LLM response object
+     */
+    public function response() : LLMResponse {
+        $response = $this->requestHandler->responseFor($this->request);
+        $this->events->dispatch(new InstructorDone(['result' => $response->value()]));
+        return $response;
+    }
+
     /**
      * Executes the request and returns the response stream
      */
-    public function stream() : Stream {
+    public function stream() : InstructorStream {
         // TODO: do we need this? cannot we just turn streaming on?
         if (!$this->request->isStream()) {
             throw new Exception('Instructor::stream() method requires response streaming: set "stream" = true in the request options.');
         }
         $stream = $this->requestHandler->streamResponseFor($this->request);
-        return new Stream($stream, $this->events);
+        return new InstructorStream($stream, $this->events);
     }
 }