Skip to content

Commit

Permalink
Fixes in JSON extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
ddebowczyk committed Sep 29, 2024
1 parent 7cd9f9e commit d4d6d69
Show file tree
Hide file tree
Showing 13 changed files with 273 additions and 36 deletions.
18 changes: 15 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -904,9 +904,21 @@ To provide an essential functionality we needed here Instructor for PHP leverage

Instructor for PHP is compatible with PHP 8.2 or later and, due to minimal dependencies, should work with any framework of your choice.

- [SaloonPHP](https://docs.saloon.dev/) - for handling communication with LLM API providers
- [Symfony components](https://symfony.com/) - for validation, serialization and other utilities

- [Guzzle](https://docs.guzzlephp.org/)
- [Symfony components](https://symfony.com/)
* symfony/property-access
* symfony/property-info
* symfony/serializer
* symfony/type-info
* symfony/validator
- adbario/php-dot-notation
- phpdocumentor/reflection-docblock
- phpstan/phpdoc-parser
- vlucas/phpdotenv

Additional dependencies are required for some extras:
- spatie/array-to-xml
- gioni06/gpt3-tokenizer

## TODOs

Expand Down
2 changes: 0 additions & 2 deletions composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@
"ext-fileinfo": "*",
"gioni06/gpt3-tokenizer": "^1.2",
"guzzlehttp/guzzle": "^7.8",
"league/flysystem": "^3.0",
"nyholm/psr7": "^1.8",
"phpdocumentor/reflection-docblock": "^5.4",
"phpstan/phpdoc-parser": "^1.29",
"psr/log": "^3.0",
Expand Down
23 changes: 14 additions & 9 deletions examples/A02_Advanced/ContextCaching/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@
use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Instructor;
use Cognesy\Instructor\Schema\Attributes\Description;
use Cognesy\Instructor\Utils\Env;
use Cognesy\Instructor\Utils\Str;

class Project {
public string $name;
public string $targetAudience;
/** @var string[] */
#[Description('Technology platform and libraries used in the project')]
public array $technologies;
Expand All @@ -43,9 +44,9 @@ class Project {
/** @var string[] */
#[Description('Applications and potential use cases of the project')]
public array $applications;
#[Description('Explain the purpose of the project and the problems it solves')]
#[Description('Explain the purpose of the project and the domain specific problems it solves')]
public string $description;
#[Description('Example code in Markdown demonstrating the application of the library')]
#[Description('Example code in Markdown demonstrating domain specific application of the library')]
public string $code;
}
?>
Expand All @@ -59,9 +60,9 @@ class Project {
$content = file_get_contents(__DIR__ . '/../../../README.md');

$cached = (new Instructor)->withConnection('anthropic')->cacheContext(
system: 'Your goal is to respond questions about the project described in the README.md file',
system: 'Your goal is to respond questions about the project described in the README.md file'
. "\n\n# README.md\n\n" . $content,
prompt: 'Respond to the user with a description of the project with JSON using schema:\n<|json_schema|>',
input: "# README.md\n\n" . $content,
);
?>
```
Expand All @@ -73,12 +74,14 @@ class Project {
```php
<?php
$project = $cached->respond(
messages: 'Describe the project - my audience is P&C insurance CIOs',
mode: Mode::Json,
messages: 'Describe the project in a way compelling to my audience: P&C insurance CIOs.',
responseModel: Project::class,
options: ['max_tokens' => 4096],
mode: Mode::Json,
);
dump($project);
assert($project instanceof Project);
assert(Str::contains($project->name, 'Instructor'));
?>
```
Now we can use the same context to ask the user to describe the project for a different
Expand All @@ -90,11 +93,13 @@ class Project {
```php
<?php
$project = $cached->respond(
messages: 'Describe the project - my audience is boutique CMS consulting company owner',
mode: Mode::Json,
messages: "Describe the project in a way compelling to my audience: boutique CMS consulting company owner.",
responseModel: Project::class,
options: ['max_tokens' => 4096],
mode: Mode::Json,
);
dump($project);
assert($project instanceof Project);
assert(Str::contains($project->name, 'Instructor'));
?>
```
6 changes: 6 additions & 0 deletions src/Core/RequestHandler.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
use Cognesy\Instructor\Contracts\CanHandleSyncRequest;
use Cognesy\Instructor\Data\Request;
use Cognesy\Instructor\Data\ResponseModel;
use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Events\EventDispatcher;
use Cognesy\Instructor\Events\Instructor\ResponseGenerated;
use Cognesy\Instructor\Events\Request\NewValidationRecoveryAttempt;
Expand All @@ -17,6 +18,7 @@
use Cognesy\Instructor\Extras\LLM\Data\ApiResponse;
use Cognesy\Instructor\Extras\LLM\Inference;
use Cognesy\Instructor\Extras\LLM\InferenceResponse;
use Cognesy\Instructor\Utils\Json\Json;
use Cognesy\Instructor\Utils\Result\Result;
use Exception;
use Generator;
Expand Down Expand Up @@ -92,6 +94,10 @@ protected function getApiResponse(Request $request) : ApiResponse {
try {
$this->events->dispatch(new RequestSentToLLM($request));
$apiResponse = $this->makeInference($request)->toApiResponse();
$apiResponse->content = match($request->mode()) {
Mode::Text => $apiResponse->content,
default => Json::find($apiResponse->content),
};
} catch (Exception $e) {
$this->events->dispatch(new RequestToLLMFailed($request, $e->getMessage()));
throw $e;
Expand Down
2 changes: 1 addition & 1 deletion src/Core/StreamResponse/PartialsGenerator.php
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ protected function tryGetPartialObject(
string $partialJsonData,
ResponseModel $responseModel,
) : Result {
return Chain::from(fn() => Json::fix($partialJsonData))
return Chain::from(fn() => Json::fix(Json::find($partialJsonData)))
->through(fn($jsonData) => $this->responseDeserializer->deserialize($jsonData, $responseModel, $this?->toolCalls->last()->name))
->through(fn($object) => $this->responseTransformer->transform($object))
->result();
Expand Down
1 change: 1 addition & 0 deletions src/Extras/Debug/Debug.php
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ private static function printHeaders(array $headers) : void {
}

private static function printBody(string $body) : void {
/** @noinspection ForgottenDebugOutputInspection */
dump(json_decode($body));
}
}
6 changes: 2 additions & 4 deletions src/Extras/Http/Drivers/GuzzleHttpClient.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

namespace Cognesy\Instructor\Extras\Http\Drivers;

use Cognesy\Instructor\Events\EventDispatcher;
use Cognesy\Instructor\Extras\Debug\Debug;
use Cognesy\Instructor\Extras\Http\Contracts\CanHandleHttp;
use Cognesy\Instructor\Extras\Http\Data\HttpClientConfig;
Expand All @@ -22,14 +21,13 @@ class GuzzleHttpClient implements CanHandleHttp

public function __construct(
protected HttpClientConfig $config,
protected ?EventDispatcher $events = null,
protected ?Client $httpClient = null,
) {
$this->events = $events ?? new EventDispatcher();
if (isset($this->httpClient) && Debug::isEnabled()) {
throw new InvalidArgumentException("Guzzle does not allow to inject debugging stack into existing client. Turn off debug or use default client.");
}
$this->client = match(Debug::isEnabled()) {
false => new Client(),
false => $httpClient ?? new Client(),
true => new Client(['handler' => $this->addDebugStack(HandlerStack::create())]),
};
}
Expand Down
8 changes: 5 additions & 3 deletions src/Extras/LLM/InferenceResponse.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
use Cognesy\Instructor\Events\ApiClient\ApiResponseReceived;
use Cognesy\Instructor\Events\ApiClient\PartialApiResponseReceived;
use Cognesy\Instructor\Events\EventDispatcher;
use Cognesy\Instructor\Events\Inference\InferenceResponseGenerated;
use Cognesy\Instructor\Events\Inference\PartialInferenceResponseGenerated;
use Cognesy\Instructor\Extras\Http\StreamReader;
use Cognesy\Instructor\Extras\LLM\Contracts\CanHandleInference;
use Cognesy\Instructor\Extras\LLM\Data\ApiResponse;
Expand All @@ -21,6 +19,7 @@ class InferenceResponse
{
protected EventDispatcher $events;
protected StreamReader $streamReader;
protected string $responseContent = '';

public function __construct(
protected ResponseInterface $response,
Expand Down Expand Up @@ -101,7 +100,10 @@ public function psrStream() : StreamInterface {
// INTERNAL /////////////////////////////////////////////////

protected function responseData() : array {
return Json::parse($this->response->getBody()->getContents()) ?? [];
if (empty($this->responseContent)) {
$this->responseContent = $this->response->getBody()->getContents();
}
return Json::parse($this->responseContent);
}

/**
Expand Down
31 changes: 22 additions & 9 deletions src/Utils/Json/Json.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ public static function find(string $text) : string {
if (empty($text)) {
return '';
}
$candidates = (new Json)->extractJSONStrings($text);
return empty($candidates) ? '' : $candidates[0];
return (new Json)->tryExtractJson($text);
}

public static function findPartial(string $text) : string {
Expand All @@ -25,7 +24,7 @@ public static function findPartial(string $text) : string {
}

public static function fix(string $text) : string {
return (new JsonParser)->fix($text);
return (new PartialJsonParser)->fix($text);
}

public static function parse(string $text, mixed $default = null) : mixed {
Expand All @@ -34,7 +33,7 @@ public static function parse(string $text, mixed $default = null) : mixed {
}

public static function parsePartial(string $text, bool $associative = true) : mixed {
return (new JsonParser)->parse($text, $associative);
return (new PartialJsonParser)->parse($text, $associative);
}

public static function encode(mixed $json, int $options = 0) : string {
Expand All @@ -43,16 +42,31 @@ public static function encode(mixed $json, int $options = 0) : string {

// INTERNAL ////////////////////////////////////////////////////////////////

private function tryExtractJson(string $text) : string {
// approach 1
$candidates = $this->extractJSONStrings($text);
$json = empty($candidates) ? '' : $candidates[0] ?? '';
if (!empty($json)) {
return $json;
}
// approach 2
$maybeJson = $this->naiveExtract($text);
$json = (new ResilientJsonParser($maybeJson))->parse();
if (!empty($json)) {
return json_encode($json);
}
// failed to find JSON
return '';
}

private function naiveExtract(string $text) : string {
if (empty($text)) {
return '';
}
$firstOpenBracket = strpos($text, '{');
if ($firstOpenBracket === false) {
if (($firstOpenBracket = strpos($text, '{')) === false) {
return '';
}
$lastCloseBracket = strrpos($text, '}');
if ($lastCloseBracket === false) {
if (($lastCloseBracket = strrpos($text, '}')) === false) {
return '';
}
return substr($text, $firstOpenBracket, $lastCloseBracket - $firstOpenBracket + 1);
Expand Down Expand Up @@ -95,7 +109,6 @@ private function extractJSONStrings(string $text): array {
$currentCandidate .= $char;
}
}

return $this->validateJSONStrings($candidates);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
* Original source: https://github.com/greghunt/partial-json/
* License: MIT
*/
class JsonParser
class PartialJsonParser
{
private $parsers = [];
private string $lastParseReminding = '';
Expand Down
Loading

0 comments on commit d4d6d69

Please sign in to comment.