-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixed HTTPClient::make() missing event dispatcher refs
- Loading branch information
1 parent
55cd64f
commit 3e5c3ae
Showing
17 changed files
with
1,852 additions
and
1,829 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,57 +1,65 @@ | ||
--- | ||
title: 'Embeddings' | ||
docname: 'embeddings' | ||
--- | ||
|
||
## Overview | ||
|
||
`Embeddings` class offers access to embeddings APIs and convenient methods | ||
to find top K vectors or documents most similar to provided query. | ||
|
||
`Embeddings` class supports following embeddings providers: | ||
- Azure | ||
- Cohere | ||
- Gemini | ||
- Jina | ||
- Mistral | ||
- OpenAI | ||
|
||
Embeddings providers access details can be found and modified via | ||
`/config/embed.php`. | ||
|
||
|
||
## Example | ||
|
||
```php | ||
<?php | ||
$loader = require 'vendor/autoload.php'; | ||
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/'); | ||
|
||
use Cognesy\Instructor\Extras\Embeddings\Embeddings; | ||
|
||
$documents = [ | ||
'Computer vision models are used to analyze images and videos.', | ||
'The bakers at the Nashville Bakery baked 200 loaves of bread on Monday morning.', | ||
'The new movie starring Tom Hanks is now playing in theaters.', | ||
'Famous soccer player Lionel Messi has arrived in town.', | ||
'News about the latest iPhone model has been leaked.', | ||
'New car model by Tesla is now available for pre-order.', | ||
'Philip K. Dick is an author of many sci-fi novels.', | ||
]; | ||
|
||
$query = "technology news"; | ||
|
||
$connections = ['azure', 'cohere1', 'gemini', 'jina', 'mistral', 'ollama', 'openai']; | ||
|
||
foreach($connections as $connection) { | ||
$bestMatches = (new Embeddings)->withConnection($connection)->findSimilar( | ||
query: $query, | ||
documents: $documents, | ||
topK: 3 | ||
); | ||
|
||
echo "\n[$connection]\n"; | ||
dump($bestMatches); | ||
} | ||
?> | ||
``` | ||
--- | ||
title: 'Embeddings' | ||
docname: 'embeddings' | ||
--- | ||
|
||
## Overview | ||
|
||
`Embeddings` class offers access to embeddings APIs and convenient methods | ||
to find top K vectors or documents most similar to provided query. | ||
|
||
`Embeddings` class supports following embeddings providers: | ||
- Azure | ||
- Cohere | ||
- Gemini | ||
- Jina | ||
- Mistral | ||
- OpenAI | ||
|
||
Embeddings providers access details can be found and modified via | ||
`/config/embed.php`. | ||
|
||
|
||
## Example | ||
|
||
```php | ||
<?php | ||
$loader = require 'vendor/autoload.php'; | ||
$loader->add('Cognesy\\Instructor\\', __DIR__ . '../../src/'); | ||
|
||
use Cognesy\Instructor\Extras\Embeddings\Embeddings; | ||
|
||
$documents = [ | ||
'Computer vision models are used to analyze images and videos.', | ||
'The bakers at the Nashville Bakery baked 200 loaves of bread on Monday morning.', | ||
'The new movie starring Tom Hanks is now playing in theaters.', | ||
'Famous soccer player Lionel Messi has arrived in town.', | ||
'News about the latest iPhone model has been leaked.', | ||
'New car model by Tesla is now available for pre-order.', | ||
'Philip K. Dick is an author of many sci-fi novels.', | ||
]; | ||
|
||
$query = "technology news"; | ||
|
||
$connections = [ | ||
'azure', | ||
'cohere1', | ||
'gemini', | ||
'jina', | ||
'mistral', | ||
//'ollama', | ||
'openai' | ||
]; | ||
|
||
foreach($connections as $connection) { | ||
$bestMatches = (new Embeddings)->withConnection($connection)->findSimilar( | ||
query: $query, | ||
documents: $documents, | ||
topK: 3 | ||
); | ||
|
||
echo "\n[$connection]\n"; | ||
dump($bestMatches); | ||
} | ||
?> | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,81 +1,84 @@ | ||
<?php | ||
namespace Cognesy\Instructor\Extras\Embeddings\Drivers; | ||
|
||
use Cognesy\Instructor\Extras\Embeddings\Contracts\CanVectorize; | ||
use Cognesy\Instructor\Extras\Embeddings\Data\EmbeddingsConfig; | ||
use Cognesy\Instructor\Extras\Embeddings\Data\Vector; | ||
use Cognesy\Instructor\Extras\Embeddings\EmbeddingsResponse; | ||
use Cognesy\Instructor\Features\Http\Contracts\CanHandleHttp; | ||
use Cognesy\Instructor\Features\Http\HttpClient; | ||
use Cognesy\Instructor\Features\LLM\Data\Usage; | ||
|
||
class AzureOpenAIDriver implements CanVectorize | ||
{ | ||
public function __construct( | ||
protected EmbeddingsConfig $config, | ||
protected ?CanHandleHttp $httpClient = null, | ||
) { | ||
$this->httpClient = $httpClient ?? HttpClient::make(); | ||
} | ||
|
||
public function vectorize(array $input, array $options = []): EmbeddingsResponse { | ||
$response = $this->httpClient->handle( | ||
$this->getEndpointUrl(), | ||
$this->getRequestHeaders(), | ||
$this->getRequestBody($input, $options), | ||
); | ||
return $this->toResponse(json_decode($response->getContents(), true)); | ||
} | ||
|
||
// INTERNAL ///////////////////////////////////////////////// | ||
|
||
protected function getEndpointUrl(): string { | ||
return str_replace( | ||
search: array_map(fn($key) => "{".$key."}", array_keys($this->config->metadata)), | ||
replace: array_values($this->config->metadata), | ||
subject: "{$this->config->apiUrl}{$this->config->endpoint}" | ||
) . $this->getUrlParams(); | ||
} | ||
|
||
protected function getUrlParams(): string { | ||
$params = array_filter([ | ||
'api-version' => $this->config->metadata['apiVersion'] ?? '', | ||
]); | ||
if (!empty($params)) { | ||
return '?' . http_build_query($params); | ||
} | ||
return ''; | ||
} | ||
|
||
protected function getRequestHeaders(): array { | ||
return [ | ||
'Api-Key' => $this->config->apiKey, | ||
'Content-Type' => 'application/json', | ||
]; | ||
} | ||
|
||
protected function getRequestBody(array $input, array $options) : array { | ||
return array_filter(array_merge([ | ||
'input' => $input, | ||
'model' => $this->config->model, | ||
'encoding_format' => 'float', | ||
], $options)); | ||
} | ||
|
||
protected function toResponse(array $response) : EmbeddingsResponse { | ||
return new EmbeddingsResponse( | ||
vectors: array_map( | ||
callback: fn($item) => new Vector(values: $item['embedding'], id: $item['index']), | ||
array: $response['data'] | ||
), | ||
usage: $this->makeUsage($response), | ||
); | ||
} | ||
|
||
protected function makeUsage(array $response): Usage { | ||
return new Usage( | ||
inputTokens: $response['usage']['prompt_tokens'] ?? 0, | ||
outputTokens: ($response['usage']['total_tokens'] ?? 0) - ($response['usage']['prompt_tokens'] ?? 0), | ||
); | ||
} | ||
<?php | ||
namespace Cognesy\Instructor\Extras\Embeddings\Drivers; | ||
|
||
use Cognesy\Instructor\Events\EventDispatcher; | ||
use Cognesy\Instructor\Extras\Embeddings\Contracts\CanVectorize; | ||
use Cognesy\Instructor\Extras\Embeddings\Data\EmbeddingsConfig; | ||
use Cognesy\Instructor\Extras\Embeddings\Data\Vector; | ||
use Cognesy\Instructor\Extras\Embeddings\EmbeddingsResponse; | ||
use Cognesy\Instructor\Features\Http\Contracts\CanHandleHttp; | ||
use Cognesy\Instructor\Features\Http\HttpClient; | ||
use Cognesy\Instructor\Features\LLM\Data\Usage; | ||
|
||
class AzureOpenAIDriver implements CanVectorize | ||
{ | ||
public function __construct( | ||
protected EmbeddingsConfig $config, | ||
protected ?CanHandleHttp $httpClient = null, | ||
protected ?EventDispatcher $events = null, | ||
) { | ||
$this->events = $events ?? new EventDispatcher(); | ||
$this->httpClient = $httpClient ?? HttpClient::make(events: $this->events); | ||
} | ||
|
||
public function vectorize(array $input, array $options = []): EmbeddingsResponse { | ||
$response = $this->httpClient->handle( | ||
$this->getEndpointUrl(), | ||
$this->getRequestHeaders(), | ||
$this->getRequestBody($input, $options), | ||
); | ||
return $this->toResponse(json_decode($response->getContents(), true)); | ||
} | ||
|
||
// INTERNAL ///////////////////////////////////////////////// | ||
|
||
protected function getEndpointUrl(): string { | ||
return str_replace( | ||
search: array_map(fn($key) => "{".$key."}", array_keys($this->config->metadata)), | ||
replace: array_values($this->config->metadata), | ||
subject: "{$this->config->apiUrl}{$this->config->endpoint}" | ||
) . $this->getUrlParams(); | ||
} | ||
|
||
protected function getUrlParams(): string { | ||
$params = array_filter([ | ||
'api-version' => $this->config->metadata['apiVersion'] ?? '', | ||
]); | ||
if (!empty($params)) { | ||
return '?' . http_build_query($params); | ||
} | ||
return ''; | ||
} | ||
|
||
protected function getRequestHeaders(): array { | ||
return [ | ||
'Api-Key' => $this->config->apiKey, | ||
'Content-Type' => 'application/json', | ||
]; | ||
} | ||
|
||
protected function getRequestBody(array $input, array $options) : array { | ||
return array_filter(array_merge([ | ||
'input' => $input, | ||
'model' => $this->config->model, | ||
'encoding_format' => 'float', | ||
], $options)); | ||
} | ||
|
||
protected function toResponse(array $response) : EmbeddingsResponse { | ||
return new EmbeddingsResponse( | ||
vectors: array_map( | ||
callback: fn($item) => new Vector(values: $item['embedding'], id: $item['index']), | ||
array: $response['data'] | ||
), | ||
usage: $this->makeUsage($response), | ||
); | ||
} | ||
|
||
protected function makeUsage(array $response): Usage { | ||
return new Usage( | ||
inputTokens: $response['usage']['prompt_tokens'] ?? 0, | ||
outputTokens: ($response['usage']['total_tokens'] ?? 0) - ($response['usage']['prompt_tokens'] ?? 0), | ||
); | ||
} | ||
} |
Oops, something went wrong.