Skip to content

Commit

Permalink
Fixed HTTPClient::make() missing event dispatcher refs
Browse files Browse the repository at this point in the history
  • Loading branch information
ddebowczyk committed Nov 4, 2024
1 parent 55cd64f commit 3e5c3ae
Show file tree
Hide file tree
Showing 17 changed files with 1,852 additions and 1,829 deletions.
6 changes: 1 addition & 5 deletions evals/LLMModes/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,7 @@
);

$experiment = new Experiment(
cases: InferenceCases::except(
connections: ['ollama'],
modes: [],
stream: [],
),
cases: InferenceCases::all(),
executor: new RunInference($data),
processors: [
new CompanyEval(
Expand Down
122 changes: 65 additions & 57 deletions examples/A05_Extras/Embeddings/run.php
Original file line number Diff line number Diff line change
@@ -1,57 +1,65 @@
---
title: 'Embeddings'
docname: 'embeddings'
---

## Overview

`Embeddings` class offers access to embeddings APIs and convenient methods
to find top K vectors or documents most similar to provided query.

`Embeddings` class supports following embeddings providers:
- Azure
- Cohere
- Gemini
- Jina
- Mistral
- OpenAI

Embeddings providers access details can be found and modified via
`/config/embed.php`.


## Example

```php
<?php
$loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');

use Cognesy\Instructor\Extras\Embeddings\Embeddings;

$documents = [
'Computer vision models are used to analyze images and videos.',
'The bakers at the Nashville Bakery baked 200 loaves of bread on Monday morning.',
'The new movie starring Tom Hanks is now playing in theaters.',
'Famous soccer player Lionel Messi has arrived in town.',
'News about the latest iPhone model has been leaked.',
'New car model by Tesla is now available for pre-order.',
'Philip K. Dick is an author of many sci-fi novels.',
];

$query = "technology news";

$connections = ['azure', 'cohere1', 'gemini', 'jina', 'mistral', 'ollama', 'openai'];

foreach($connections as $connection) {
$bestMatches = (new Embeddings)->withConnection($connection)->findSimilar(
query: $query,
documents: $documents,
topK: 3
);

echo "\n[$connection]\n";
dump($bestMatches);
}
?>
```
---
title: 'Embeddings'
docname: 'embeddings'
---

## Overview

`Embeddings` class offers access to embeddings APIs and convenient methods
to find top K vectors or documents most similar to provided query.

`Embeddings` class supports following embeddings providers:
- Azure
- Cohere
- Gemini
- Jina
- Mistral
- OpenAI

Embeddings providers access details can be found and modified via
`/config/embed.php`.


## Example

```php
<?php
$loader = require 'vendor/autoload.php';
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/');

use Cognesy\Instructor\Extras\Embeddings\Embeddings;

$documents = [
'Computer vision models are used to analyze images and videos.',
'The bakers at the Nashville Bakery baked 200 loaves of bread on Monday morning.',
'The new movie starring Tom Hanks is now playing in theaters.',
'Famous soccer player Lionel Messi has arrived in town.',
'News about the latest iPhone model has been leaked.',
'New car model by Tesla is now available for pre-order.',
'Philip K. Dick is an author of many sci-fi novels.',
];

$query = "technology news";

$connections = [
'azure',
'cohere1',
'gemini',
'jina',
'mistral',
//'ollama',
'openai'
];

foreach($connections as $connection) {
$bestMatches = (new Embeddings)->withConnection($connection)->findSimilar(
query: $query,
documents: $documents,
topK: 3
);

echo "\n[$connection]\n";
dump($bestMatches);
}
?>
```
163 changes: 83 additions & 80 deletions src/Extras/Embeddings/Drivers/AzureOpenAIDriver.php
Original file line number Diff line number Diff line change
@@ -1,81 +1,84 @@
<?php
namespace Cognesy\Instructor\Extras\Embeddings\Drivers;

use Cognesy\Instructor\Extras\Embeddings\Contracts\CanVectorize;
use Cognesy\Instructor\Extras\Embeddings\Data\EmbeddingsConfig;
use Cognesy\Instructor\Extras\Embeddings\Data\Vector;
use Cognesy\Instructor\Extras\Embeddings\EmbeddingsResponse;
use Cognesy\Instructor\Features\Http\Contracts\CanHandleHttp;
use Cognesy\Instructor\Features\Http\HttpClient;
use Cognesy\Instructor\Features\LLM\Data\Usage;

class AzureOpenAIDriver implements CanVectorize
{
public function __construct(
protected EmbeddingsConfig $config,
protected ?CanHandleHttp $httpClient = null,
) {
$this->httpClient = $httpClient ?? HttpClient::make();
}

public function vectorize(array $input, array $options = []): EmbeddingsResponse {
$response = $this->httpClient->handle(
$this->getEndpointUrl(),
$this->getRequestHeaders(),
$this->getRequestBody($input, $options),
);
return $this->toResponse(json_decode($response->getContents(), true));
}

// INTERNAL /////////////////////////////////////////////////

protected function getEndpointUrl(): string {
return str_replace(
search: array_map(fn($key) => "{".$key."}", array_keys($this->config->metadata)),
replace: array_values($this->config->metadata),
subject: "{$this->config->apiUrl}{$this->config->endpoint}"
) . $this->getUrlParams();
}

protected function getUrlParams(): string {
$params = array_filter([
'api-version' => $this->config->metadata['apiVersion'] ?? '',
]);
if (!empty($params)) {
return '?' . http_build_query($params);
}
return '';
}

protected function getRequestHeaders(): array {
return [
'Api-Key' => $this->config->apiKey,
'Content-Type' => 'application/json',
];
}

protected function getRequestBody(array $input, array $options) : array {
return array_filter(array_merge([
'input' => $input,
'model' => $this->config->model,
'encoding_format' => 'float',
], $options));
}

protected function toResponse(array $response) : EmbeddingsResponse {
return new EmbeddingsResponse(
vectors: array_map(
callback: fn($item) => new Vector(values: $item['embedding'], id: $item['index']),
array: $response['data']
),
usage: $this->makeUsage($response),
);
}

protected function makeUsage(array $response): Usage {
return new Usage(
inputTokens: $response['usage']['prompt_tokens'] ?? 0,
outputTokens: ($response['usage']['total_tokens'] ?? 0) - ($response['usage']['prompt_tokens'] ?? 0),
);
}
<?php
namespace Cognesy\Instructor\Extras\Embeddings\Drivers;

use Cognesy\Instructor\Events\EventDispatcher;
use Cognesy\Instructor\Extras\Embeddings\Contracts\CanVectorize;
use Cognesy\Instructor\Extras\Embeddings\Data\EmbeddingsConfig;
use Cognesy\Instructor\Extras\Embeddings\Data\Vector;
use Cognesy\Instructor\Extras\Embeddings\EmbeddingsResponse;
use Cognesy\Instructor\Features\Http\Contracts\CanHandleHttp;
use Cognesy\Instructor\Features\Http\HttpClient;
use Cognesy\Instructor\Features\LLM\Data\Usage;

class AzureOpenAIDriver implements CanVectorize
{
public function __construct(
protected EmbeddingsConfig $config,
protected ?CanHandleHttp $httpClient = null,
protected ?EventDispatcher $events = null,
) {
$this->events = $events ?? new EventDispatcher();
$this->httpClient = $httpClient ?? HttpClient::make(events: $this->events);
}

public function vectorize(array $input, array $options = []): EmbeddingsResponse {
$response = $this->httpClient->handle(
$this->getEndpointUrl(),
$this->getRequestHeaders(),
$this->getRequestBody($input, $options),
);
return $this->toResponse(json_decode($response->getContents(), true));
}

// INTERNAL /////////////////////////////////////////////////

protected function getEndpointUrl(): string {
return str_replace(
search: array_map(fn($key) => "{".$key."}", array_keys($this->config->metadata)),
replace: array_values($this->config->metadata),
subject: "{$this->config->apiUrl}{$this->config->endpoint}"
) . $this->getUrlParams();
}

protected function getUrlParams(): string {
$params = array_filter([
'api-version' => $this->config->metadata['apiVersion'] ?? '',
]);
if (!empty($params)) {
return '?' . http_build_query($params);
}
return '';
}

protected function getRequestHeaders(): array {
return [
'Api-Key' => $this->config->apiKey,
'Content-Type' => 'application/json',
];
}

protected function getRequestBody(array $input, array $options) : array {
return array_filter(array_merge([
'input' => $input,
'model' => $this->config->model,
'encoding_format' => 'float',
], $options));
}

protected function toResponse(array $response) : EmbeddingsResponse {
return new EmbeddingsResponse(
vectors: array_map(
callback: fn($item) => new Vector(values: $item['embedding'], id: $item['index']),
array: $response['data']
),
usage: $this->makeUsage($response),
);
}

protected function makeUsage(array $response): Usage {
return new Usage(
inputTokens: $response['usage']['prompt_tokens'] ?? 0,
outputTokens: ($response['usage']['total_tokens'] ?? 0) - ($response['usage']['prompt_tokens'] ?? 0),
);
}
}
Loading

0 comments on commit 3e5c3ae

Please sign in to comment.