From 33677a64e3b895524d03563b51cc236e68e14185 Mon Sep 17 00:00:00 2001 From: ddebowczyk Date: Mon, 4 Mar 2024 01:08:11 +0100 Subject: [PATCH] More flexible processing architecture; New Schema analyzer; Support for Scalar results --- NOTES.md | 66 +++ README.md | 419 ++++++++++-------- docs/data_model.md | 78 +++- experimental/BooleanLLMFunction.php | 33 -- experimental/IntegerLLMFunction.php | 34 -- experimental/SimpleFunctionCallSchema.php | 39 -- experimental/StringLLMFunction.php | 33 -- src/Contracts/CanDeserialize.php | 2 +- src/Contracts/CanDeserializeJson.php | 8 + src/Contracts/CanProvideSchema.php | 8 + src/Contracts/CanSelfValidate.php | 9 + src/Contracts/CanTransformResponse.php | 8 + src/Contracts/CanValidateObject.php | 9 - src/Contracts/CanValidateResponse.php | 9 + .../Symfony/ArrayOfNormalizer.php | 26 -- src/Extras/ScalarAdapter/Scalar.php | 171 +++++++ src/Extras/ScalarAdapter/ValueType.php | 20 + src/Instructor.php | 81 +++- src/Reflection/Factories/ArrayItemFactory.php | 54 --- src/Reflection/TypeDefs/SimpleTypeDef.php | 13 - src/Reflection/TypeDefs/TypeDef.php | 9 - src/Reflection/TypeDefs/UndefinedTypeDef.php | 10 - src/ResponseModel.php | 131 ++++++ .../PropertyInfoBased/Data/Reference.php | 12 + .../Data/Schema/ArraySchema.php | 28 ++ .../Data/Schema/EnumSchema.php | 15 + .../Data/Schema/ObjectRefSchema.php | 27 ++ .../Data/Schema/ObjectSchema.php | 39 ++ .../Data/Schema/ScalarSchema.php | 14 + .../PropertyInfoBased/Data/Schema/Schema.php | 30 ++ .../PropertyInfoBased/Data/TypeDetails.php | 37 ++ .../Factories/FunctionCallFactory.php | 112 +++++ .../Factories/InstanceSchemaFactory.php | 13 + .../Factories/SchemaFactory.php | 141 ++++++ .../Factories/TypeDetailsFactory.php | 178 ++++++++ .../PropertyInfoBased/Utils/ClassInfo.php | 105 +++++ .../Utils/ReferenceQueue.php | 38 ++ .../{ => ReflectionBased/Data}/FCArray.php | 2 +- .../{ => ReflectionBased/Data}/FCAtom.php | 2 +- .../{ => ReflectionBased/Data}/FCEnum.php | 2 +- .../{ => ReflectionBased/Data}/FCFunction.php | 2 +- .../{ => ReflectionBased/Data}/FCObject.php | 2 +- .../Factories/FunctionCallFactory.php} | 16 +- .../Attribute/AttributeCollection.php | 2 +- .../Reflection/Attribute/AttributeUtils.php | 2 +- .../ReflectionBased}/Reflection/ClassData.php | 10 +- .../Reflection/Enums/JsonType.php | 2 +- .../Reflection/Enums/PhpType.php | 2 +- .../Reflection/Factories/ArrayItemFactory.php | 54 +++ .../Factories/ParameterDataFactory.php | 22 +- .../Factories/PropertyDataFactory.php | 22 +- .../Reflection/Factories/TypeDefFactory.php | 20 +- .../Reflection/FunctionData.php | 10 +- .../Reflection/MethodData.php | 14 +- .../ParameterData/ArrayParameterData.php | 8 +- .../ParameterData/BooleanParameterData.php | 10 +- .../ParameterData/EnumParameterData.php | 12 +- .../ParameterData/FloatParameterData.php | 10 +- .../ParameterData/IntegerParameterData.php | 10 +- .../ParameterData/ObjectParameterData.php | 10 +- .../ParameterData/ParameterData.php | 10 +- .../ParameterData/StringParameterData.php | 10 +- .../ParameterData/UndefinedParameterData.php | 10 +- .../Reflection/PhpDoc/DocstringUtils.php | 2 +- .../PropertyData/ArrayPropertyData.php | 10 +- .../PropertyData/BooleanPropertyData.php | 10 +- .../PropertyData/EnumPropertyData.php | 12 +- .../PropertyData/FloatPropertyData.php | 10 +- .../PropertyData/IntegerPropertyData.php | 10 +- .../PropertyData/ObjectPropertyData.php | 10 +- .../Reflection/PropertyData/PropertyData.php | 10 +- .../PropertyData/StringPropertyData.php | 10 +- .../PropertyData/UndefinedPropertyData.php | 10 +- .../Reflection/Tag/TagCollection.php | 4 +- .../Reflection/Tag/TypeDefTag.php | 2 +- .../Reflection/TypeDefs/ArrayTypeDef.php | 4 +- .../Reflection/TypeDefs/EnumTypeDef.php | 4 +- .../Reflection/TypeDefs/ObjectTypeDef.php | 4 +- .../Reflection/TypeDefs/SimpleTypeDef.php | 13 + .../Reflection/TypeDefs/TypeDef.php | 9 + .../Reflection/TypeDefs/TypeDefContext.php | 10 +- .../Reflection/TypeDefs/UndefinedTypeDef.php | 10 + .../Reflection/Utils/DescriptionUtils.php | 6 +- .../Reflection/Utils/ReflectionUtils.php | 4 +- src/Validators/Symfony/Validator.php | 11 +- .../it_creates_function_call___function.snap | 0 .../it_creates_function_call___method.snap | 0 .../it_creates_function_call___object.snap | 0 .../it_creates_function_call___object.snap | 93 ++++ tests/Examples/PersonWithValidationMixin.php | 25 ++ tests/Examples/Schema/ComplexClass.php | 35 ++ tests/Examples/Schema/IntEnum.php | 8 + tests/Examples/Schema/NestedClass.php | 13 + .../Examples/Schema/SelfReferencingClass.php | 9 + tests/Examples/Schema/SimpleClass.php | 17 + tests/Examples/Schema/StringEnum.php | 8 + tests/Examples/extractEvents.php | 22 +- tests/Feature/ExperimentalTest.php | 1 + tests/Feature/ExtractionTest.php | 3 +- tests/Feature/FeaturesTest.php | 19 + ...chemaTest.php => ReflectionSchemaTest.php} | 28 +- tests/Feature/ScalarsTest.php | 97 ++++ tests/Feature/SymfonySchemaTest.php | 33 ++ tests/Feature/ValidationTest.php | 16 +- 104 files changed, 2264 insertions(+), 673 deletions(-) create mode 100644 NOTES.md delete mode 100644 experimental/BooleanLLMFunction.php delete mode 100644 experimental/IntegerLLMFunction.php delete mode 100644 experimental/SimpleFunctionCallSchema.php delete mode 100644 experimental/StringLLMFunction.php create mode 100644 src/Contracts/CanDeserializeJson.php create mode 100644 src/Contracts/CanProvideSchema.php create mode 100644 src/Contracts/CanSelfValidate.php create mode 100644 src/Contracts/CanTransformResponse.php delete mode 100644 src/Contracts/CanValidateObject.php create mode 100644 src/Contracts/CanValidateResponse.php delete mode 100644 src/Deserializers/Symfony/ArrayOfNormalizer.php create mode 100644 src/Extras/ScalarAdapter/Scalar.php create mode 100644 src/Extras/ScalarAdapter/ValueType.php delete mode 100644 src/Reflection/Factories/ArrayItemFactory.php delete mode 100644 src/Reflection/TypeDefs/SimpleTypeDef.php delete mode 100644 src/Reflection/TypeDefs/TypeDef.php delete mode 100644 src/Reflection/TypeDefs/UndefinedTypeDef.php create mode 100644 src/ResponseModel.php create mode 100644 src/Schema/PropertyInfoBased/Data/Reference.php create mode 100644 src/Schema/PropertyInfoBased/Data/Schema/ArraySchema.php create mode 100644 src/Schema/PropertyInfoBased/Data/Schema/EnumSchema.php create mode 100644 src/Schema/PropertyInfoBased/Data/Schema/ObjectRefSchema.php create mode 100644 src/Schema/PropertyInfoBased/Data/Schema/ObjectSchema.php create mode 100644 src/Schema/PropertyInfoBased/Data/Schema/ScalarSchema.php create mode 100644 src/Schema/PropertyInfoBased/Data/Schema/Schema.php create mode 100644 src/Schema/PropertyInfoBased/Data/TypeDetails.php create mode 100644 src/Schema/PropertyInfoBased/Factories/FunctionCallFactory.php create mode 100644 src/Schema/PropertyInfoBased/Factories/InstanceSchemaFactory.php create mode 100644 src/Schema/PropertyInfoBased/Factories/SchemaFactory.php create mode 100644 src/Schema/PropertyInfoBased/Factories/TypeDetailsFactory.php create mode 100644 src/Schema/PropertyInfoBased/Utils/ClassInfo.php create mode 100644 src/Schema/PropertyInfoBased/Utils/ReferenceQueue.php rename src/Schema/{ => ReflectionBased/Data}/FCArray.php (85%) rename src/Schema/{ => ReflectionBased/Data}/FCAtom.php (82%) rename src/Schema/{ => ReflectionBased/Data}/FCEnum.php (84%) rename src/Schema/{ => ReflectionBased/Data}/FCFunction.php (90%) rename src/Schema/{ => ReflectionBased/Data}/FCObject.php (88%) rename src/Schema/{FunctionCallSchema.php => ReflectionBased/Factories/FunctionCallFactory.php} (80%) rename src/{ => Schema/ReflectionBased}/Reflection/Attribute/AttributeCollection.php (89%) rename src/{ => Schema/ReflectionBased}/Reflection/Attribute/AttributeUtils.php (87%) rename src/{ => Schema/ReflectionBased}/Reflection/ClassData.php (75%) rename src/{ => Schema/ReflectionBased}/Reflection/Enums/JsonType.php (90%) rename src/{ => Schema/ReflectionBased}/Reflection/Enums/PhpType.php (92%) create mode 100644 src/Schema/ReflectionBased/Reflection/Factories/ArrayItemFactory.php rename src/{ => Schema/ReflectionBased}/Reflection/Factories/ParameterDataFactory.php (55%) rename src/{ => Schema/ReflectionBased}/Reflection/Factories/PropertyDataFactory.php (55%) rename src/{ => Schema/ReflectionBased}/Reflection/Factories/TypeDefFactory.php (84%) rename src/{ => Schema/ReflectionBased}/Reflection/FunctionData.php (73%) rename src/{ => Schema/ReflectionBased}/Reflection/MethodData.php (65%) rename src/{ => Schema/ReflectionBased}/Reflection/ParameterData/ArrayParameterData.php (70%) rename src/{ => Schema/ReflectionBased}/Reflection/ParameterData/BooleanParameterData.php (58%) rename src/{ => Schema/ReflectionBased}/Reflection/ParameterData/EnumParameterData.php (71%) rename src/{ => Schema/ReflectionBased}/Reflection/ParameterData/FloatParameterData.php (58%) rename src/{ => Schema/ReflectionBased}/Reflection/ParameterData/IntegerParameterData.php (58%) rename src/{ => Schema/ReflectionBased}/Reflection/ParameterData/ObjectParameterData.php (73%) rename src/{ => Schema/ReflectionBased}/Reflection/ParameterData/ParameterData.php (73%) rename src/{ => Schema/ReflectionBased}/Reflection/ParameterData/StringParameterData.php (58%) rename src/{ => Schema/ReflectionBased}/Reflection/ParameterData/UndefinedParameterData.php (59%) rename src/{ => Schema/ReflectionBased}/Reflection/PhpDoc/DocstringUtils.php (94%) rename src/{ => Schema/ReflectionBased}/Reflection/PropertyData/ArrayPropertyData.php (67%) rename src/{ => Schema/ReflectionBased}/Reflection/PropertyData/BooleanPropertyData.php (60%) rename src/{ => Schema/ReflectionBased}/Reflection/PropertyData/EnumPropertyData.php (71%) rename src/{ => Schema/ReflectionBased}/Reflection/PropertyData/FloatPropertyData.php (60%) rename src/{ => Schema/ReflectionBased}/Reflection/PropertyData/IntegerPropertyData.php (60%) rename src/{ => Schema/ReflectionBased}/Reflection/PropertyData/ObjectPropertyData.php (73%) rename src/{ => Schema/ReflectionBased}/Reflection/PropertyData/PropertyData.php (73%) rename src/{ => Schema/ReflectionBased}/Reflection/PropertyData/StringPropertyData.php (60%) rename src/{ => Schema/ReflectionBased}/Reflection/PropertyData/UndefinedPropertyData.php (61%) rename src/{ => Schema/ReflectionBased}/Reflection/Tag/TagCollection.php (91%) rename src/{ => Schema/ReflectionBased}/Reflection/Tag/TypeDefTag.php (70%) rename src/{ => Schema/ReflectionBased}/Reflection/TypeDefs/ArrayTypeDef.php (65%) rename src/{ => Schema/ReflectionBased}/Reflection/TypeDefs/EnumTypeDef.php (62%) rename src/{ => Schema/ReflectionBased}/Reflection/TypeDefs/ObjectTypeDef.php (57%) create mode 100644 src/Schema/ReflectionBased/Reflection/TypeDefs/SimpleTypeDef.php create mode 100644 src/Schema/ReflectionBased/Reflection/TypeDefs/TypeDef.php rename src/{ => Schema/ReflectionBased}/Reflection/TypeDefs/TypeDefContext.php (80%) create mode 100644 src/Schema/ReflectionBased/Reflection/TypeDefs/UndefinedTypeDef.php rename src/{ => Schema/ReflectionBased}/Reflection/Utils/DescriptionUtils.php (85%) rename src/{ => Schema/ReflectionBased}/Reflection/Utils/ReflectionUtils.php (88%) rename tests/.pest/snapshots/Feature/{SchemaTest => ReflectionSchemaTest}/it_creates_function_call___function.snap (100%) rename tests/.pest/snapshots/Feature/{SchemaTest => ReflectionSchemaTest}/it_creates_function_call___method.snap (100%) rename tests/.pest/snapshots/Feature/{SchemaTest => ReflectionSchemaTest}/it_creates_function_call___object.snap (100%) create mode 100644 tests/.pest/snapshots/Feature/SymfonySchemaTest/it_creates_function_call___object.snap create mode 100644 tests/Examples/PersonWithValidationMixin.php create mode 100644 tests/Examples/Schema/ComplexClass.php create mode 100644 tests/Examples/Schema/IntEnum.php create mode 100644 tests/Examples/Schema/NestedClass.php create mode 100644 tests/Examples/Schema/SelfReferencingClass.php create mode 100644 tests/Examples/Schema/SimpleClass.php create mode 100644 tests/Examples/Schema/StringEnum.php rename tests/Feature/{SchemaTest.php => ReflectionSchemaTest.php} (72%) create mode 100644 tests/Feature/ScalarsTest.php create mode 100644 tests/Feature/SymfonySchemaTest.php diff --git a/NOTES.md b/NOTES.md new file mode 100644 index 00000000..d027ed9d --- /dev/null +++ b/NOTES.md @@ -0,0 +1,66 @@ +# NOTES + +## Support scalar types as response_model + +Solution 1: +Have universal scalar value adapter with HasSchemaProvider interface +HasSchemaProvider = schema() : Schema, which, if present, will be used to generate schema +Instead of the default schema generation mechanism +This will allow for custom schema generation + +## Custom schema generation - not based on class reflection & PHPDoc + +Model classes could implement HasSchemaProvider interface, which would allow for custom schema generation - rendering logic would skip reflection and use the provided schema instead. + +SchemaProvider could be a trait, which would allow for easy implementation. + +Example SchemaProvider: +class SchemaProvider { + public function schema(): Schema { + return new Schema([ + 'type' => 'object', + 'properties' => [ + 'id' => ['type' => 'integer', 'description' => 'Description'], + 'name' => ['type' => 'string', 'description' => 'Description'], + ], + 'required' => ['id', 'name'], + ]); + } +} + +## Validation + +What about validation in such case? we can already have ```validate()``` method in the schema, +Is it enough? + +## Deserialization + +We also need custom deserializer or easier way of customizing existing one. +Specific need is #[Description] attribute, which should be used to generate description. + +## Streaming arrays / iterables + +Callback approach - provide callback to Instructor, which will be called for each +token received (?). It does not make sense for structured outputs, only if the result +is iterable / array. + +## Partial updates + +If callback is on, we should be able to provide partial updates to the object + send +notifications about the changes. + +## Observability + +Need and solution to be analyzed + +## Other LLMs + +Either via custom BASE_URIs - via existing OpenAI client or custom LLM classes. +LLM class is the one that needs to handle all model / API specific stuff (e.g. streaming, +modes, etc.). + +## Caching schema + +It may not be worth it purely for performance reasons, but it may be useful for debugging or schema optimization (DSPy like). + +Schema could be saved in version controlled, versioned JSON files and loaded from there. In development mode it would be read from JSON file, unless class file is newer than schema file. \ No newline at end of file diff --git a/README.md b/README.md index e8dfda29..cbef3925 100644 --- a/README.md +++ b/README.md @@ -65,37 +65,37 @@ This is a simple example demonstrating how Instructor retrieves structured infor Response model class is a plain PHP class with typehints specifying the types of fields of the object. ```php - use Cognesy/Instructor; - use OpenAI; - - // Step 1: Define target data structure(s) - class Person { - public string $name; - public int $age; - } - - // Step 2: Provide content to process - $text = "His name is Jason and he is 28 years old."; - - // Step 3: Use Instructor to run LLM inference - $person = (new Instructor)->respond( - messages: [['role' => 'user', 'content' => $text]], - responseModel: Person::class, - ); // default OpenAI client is used, needs .env file with OPENAI_API_KEY - - // Step 4: Work with structured response data - assert($person instanceof Person); // true - assert($person->name === 'Jason'); // true - assert($person->age === 28); // true - - echo $person->name; // Jason - echo $person->age; // 28 - - var_dump($person); - // Person { - // name: "Jason", - // age: 28 - // } +use Cognesy/Instructor; +use OpenAI; + +// Step 1: Define target data structure(s) +class Person { + public string $name; + public int $age; +} + +// Step 2: Provide content to process +$text = "His name is Jason and he is 28 years old."; + +// Step 3: Use Instructor to run LLM inference +$person = (new Instructor)->respond( + messages: [['role' => 'user', 'content' => $text]], + responseModel: Person::class, +); // default OpenAI client is used, needs .env file with OPENAI_API_KEY + +// Step 4: Work with structured response data +assert($person instanceof Person); // true +assert($person->name === 'Jason'); // true +assert($person->age === 28); // true + +echo $person->name; // Jason +echo $person->age; // 28 + +var_dump($person); +// Person { +// name: "Jason", +// age: 28 +// } ``` > **NOTE:** Currently, Instructor only supports classes / objects as response models. In case you want to extract simple types or arrays, you need to wrap them in a class. @@ -107,21 +107,21 @@ Instructor validates results of LLM response against validation rules specified > For further details on available validation rules, check [Symfony Validation constraints](https://symfony.com/doc/current/validation.html#constraints). ```php - use Symfony\Component\Validator\Constraints as Assert; - - class Person { - public string $name; - #[Assert\PositiveOrZero] - public int $age; - } +use Symfony\Component\Validator\Constraints as Assert; - $text = "His name is Jason, he is -28 years old."; - $person = (new Instructor(llm: $mockLLM))->respond( - messages: [['role' => 'user', 'content' => $text]], - responseModel: Person::class, - ); - - // if the resulting object does not validate, Instructor throws an exception +class Person { + public string $name; + #[Assert\PositiveOrZero] + public int $age; +} + +$text = "His name is Jason, he is -28 years old."; +$person = (new Instructor(llm: $mockLLM))->respond( + messages: [['role' => 'user', 'content' => $text]], + responseModel: Person::class, +); + +// if the resulting object does not validate, Instructor throws an exception ``` @@ -132,25 +132,80 @@ In case maxRetries parameter is provided and LLM response does not meet validati Instructor uses validation errors to inform LLM on the problems identified in the response, so that LLM can try self-correcting in the next attempt. ```php - use Symfony\Component\Validator\Constraints as Assert; - - class Person { - #[Assert\Length(min: 3)] - public string $name; - #[Assert\PositiveOrZero] - public int $age; - } +use Symfony\Component\Validator\Constraints as Assert; + +class Person { + #[Assert\Length(min: 3)] + public string $name; + #[Assert\PositiveOrZero] + public int $age; +} + +$text = "His name is JX, aka Jason, he is -28 years old."; +$person = (new Instructor)->respond( + messages: [['role' => 'user', 'content' => $text]], + responseModel: Person::class, + maxRetries: 3, +); + +// if all LLM's attempts to self-correct the results fail, Instructor throws an exception +``` - $text = "His name is JX, aka Jason, he is -28 years old."; - $person = (new Instructor)->respond( - messages: [['role' => 'user', 'content' => $text]], - responseModel: Person::class, - maxRetries: 3, - ); - - // if all LLM's attempts to self-correct the results fail, Instructor throws an exception + +## Shortcuts + +### String as Input + +You can provide a string instead of an array of messages. This is useful when you want to extract data from a single block of text and want to keep your code simple. + +```php +use Cognesy/Instructor; + +$value = (new Instructor)->respond( + messages: "His name is Jason, he is 28 years old.", + responseModel: Person::class, +); +``` + + +### Extracting Scalar Values + +Sometimes we just want to get quick results without defining a class for the response model, especially if we're trying to get a straight, simple answer in a form of string, integer, boolean or float. Instructor provides a simplified API for such cases. + +```php +use Cognesy/Instructor; + +$value = (new Instructor)->respond( + messages: "His name is Jason, he is 28 years old.", + responseModel: Scalar::integer('age'), +); + +var_dump($value); +// int(28) ``` +In this example, we're extracting a single integer value from the text. You can also use `Scalar::string()`, `Scalar::boolean()` and `Scalar::float()` to extract other types of values. + +Additionally, you can use Scalar adapter to extract one of the provided options. + +```php +use Cognesy/Instructor; + +$value = (new Instructor)->respond( + messages: "His name is Jason, he currently plays Doom Eternal.", + responseModel: Scalar::select( + name: 'activityType', + options: ['work', 'entertainment', 'sport', 'other'] + ), +); + +var_dump($value); +// string(4) "entertainment" +``` + +NOTE: Currently Scalar::select() always returns strings and its ```options``` parameter only accepts string values. + + ## Specifying Data Model @@ -173,17 +228,17 @@ Use PHP type hints to specify the type of extracted data. You can also use PHP DocBlock style comments to specify the type of extracted data. This is useful when you want to specify property types for LLM, but can't or don't want to enforce type at the code level. ```php - class Person { - /** @var string */ - public $name; - /** @var int */ - public $age; - /** @var Address $address person's address */ - public $address; - } +class Person { + /** @var string */ + public $name; + /** @var int */ + public $age; + /** @var Address $address person's address */ + public $address; +} ``` -See PHPDoc documentation for more details on DocBlock: https://docs.phpdoc.org/3.0/guide/getting-started/what-is-a-docblock.html#what-is-a-docblock +See PHPDoc documentation for more details on [DocBlock website](https://docs.phpdoc.org/3.0/guide/getting-started/what-is-a-docblock.html#what-is-a-docblock). ### Typed Collections / Arrays @@ -193,16 +248,16 @@ PHP currently [does not support generics](https://wiki.php.net/rfc/generics) or Use PHP DocBlock style comments to specify the type of array elements. ```php - class Person { - // ... - } +class Person { + // ... +} - class Event { - // ... - /** @var Person[] list of extracted event participants */ - public array $participants; - // ... - } +class Event { + // ... + /** @var Person[] list of extracted event participants */ + public array $participants; + // ... +} ``` @@ -211,67 +266,67 @@ Use PHP DocBlock style comments to specify the type of array elements. Instructor can retrieve complex data structures from text. Your response model can contain nested objects, arrays, and enums. ```php - use Cognesy/Instructor; - use OpenAI; - - // define a data structures to extract data into - class Person { - public string $name; - public int $age; - public string $profession; - /** @var Skill[] */ - public array $skills; - } - - class Skill { - public string $name; - public SkillType $type; - } - - enum SkillType { - case Technical = 'technical'; - case Other = 'other'; - } - - $text = "Alex is 25 years old software engineer, who knows PHP, Python and can play the guitar."; - - $person = (new Instructor)->respond( - messages: [['role' => 'user', 'content' => $text]], - responseModel: Person::class, - client: OpenAI::client($yourApiKey), - ); // client is passed explicitly, can specify eg. different base URL - - // data is extracted into an object of given class - assert($person instanceof Person); // true - - // you can access object's extracted property values - echo $person->name; // Alex - echo $person->age; // 25 - echo $person->profession; // software engineer - echo $person->skills[0]->name; // PHP - echo $person->skills[0]->type; // SkillType::Technical - // ... - - var_dump($person); - // Person { - // name: "Alex", - // age: 25, - // profession: "software engineer", - // skills: [ - // Skill { - // name: "PHP", - // type: SkillType::Technical, - // }, - // Skill { - // name: "Python", - // type: SkillType::Technical, - // }, - // Skill { - // name: "guitar", - // type: SkillType::Other - // }, - // ] - // } +use Cognesy/Instructor; +use OpenAI; + +// define a data structures to extract data into +class Person { + public string $name; + public int $age; + public string $profession; + /** @var Skill[] */ + public array $skills; +} + +class Skill { + public string $name; + public SkillType $type; +} + +enum SkillType { + case Technical = 'technical'; + case Other = 'other'; +} + +$text = "Alex is 25 years old software engineer, who knows PHP, Python and can play the guitar."; + +$person = (new Instructor)->respond( + messages: [['role' => 'user', 'content' => $text]], + responseModel: Person::class, + client: OpenAI::client($yourApiKey), +); // client is passed explicitly, can specify eg. different base URL + +// data is extracted into an object of given class +assert($person instanceof Person); // true + +// you can access object's extracted property values +echo $person->name; // Alex +echo $person->age; // 25 +echo $person->profession; // software engineer +echo $person->skills[0]->name; // PHP +echo $person->skills[0]->type; // SkillType::Technical +// ... + +var_dump($person); +// Person { +// name: "Alex", +// age: 25, +// profession: "software engineer", +// skills: [ +// Skill { +// name: "PHP", +// type: SkillType::Technical, +// }, +// Skill { +// name: "Python", +// type: SkillType::Technical, +// }, +// Skill { +// name: "guitar", +// type: SkillType::Other +// }, +// ] +// } ``` ## Changing LLM model and options @@ -281,14 +336,18 @@ You can specify model and other options that will be passed to OpenAI / LLM endp For more details on options available - see [OpenAI PHP client](https://github.com/openai-php/client). ```php - $person = (new Instructor)->respond( - messages: [['role' => 'user', 'content' => $text]], - responseModel: Person::class, - model: 'gpt-3.5-turbo', - options: ['temperature' => 0.0], - client: OpenAI::client($yourApiKey), - ); // client is passed explicitly, can specify eg. different base URL +$person = (new Instructor)->respond( + messages: [['role' => 'user', 'content' => $text]], + responseModel: Person::class, + model: 'gpt-3.5-turbo', + options: ['temperature' => 0.0], + client: OpenAI::client($yourApiKey), +); +// client is passed explicitly +// you can specify e.g. different base URL ``` +> Some open source LLMs support OpenAI API, so you can use them with Instructor by specifying appropriate ```model``` and ```base URI``` via ```options``` parameter. + ## Using DocBlocks as Additional Instructions for LLM @@ -299,16 +358,16 @@ Instructor extracts PHP DocBlocks comments from class and property defined and i Using PHP DocBlocks instructions is not required, but sometimes you may want to clarify your intentions to improve LLM's inference results. ```php - /** - * Represents a skill of a person and context in which it was mentioned. - */ - class Skill { - public string $name; - /** @var SkillType $type type of the skill, derived from the description and context */ - public SkillType $type; - /** Directly quoted, full sentence mentioning person's skill */ - public string $context; - } +/** + * Represents a skill of a person and context in which it was mentioned. + */ +class Skill { + public string $name; + /** @var SkillType $type type of the skill, derived from the description and context */ + public SkillType $type; + /** Directly quoted, full sentence mentioning person's skill */ + public string $context; +} ``` ## Custom Validators @@ -316,33 +375,33 @@ Using PHP DocBlocks instructions is not required, but sometimes you may want to Instructor uses Symfony validation component to validate extracted data. You can use #[Assert/Callback] annotation to build fully customized validation logic. ```php - use Cognesy\Instructor\Instructor; - use Symfony\Component\Validator\Constraints as Assert; - use Symfony\Component\Validator\Context\ExecutionContextInterface; +use Cognesy\Instructor\Instructor; +use Symfony\Component\Validator\Constraints as Assert; +use Symfony\Component\Validator\Context\ExecutionContextInterface; + +class UserDetails +{ + public string $name; + public int $age; - class UserDetails - { - public string $name; - public int $age; - - #[Assert\Callback] - public function validateName(ExecutionContextInterface $context, mixed $payload) { - if ($this->name !== strtoupper($this->name)) { - $context->buildViolation("Name must be in uppercase.") - ->atPath('name') - ->setInvalidValue($this->name) - ->addViolation(); - } + #[Assert\Callback] + public function validateName(ExecutionContextInterface $context, mixed $payload) { + if ($this->name !== strtoupper($this->name)) { + $context->buildViolation("Name must be in uppercase.") + ->atPath('name') + ->setInvalidValue($this->name) + ->addViolation(); } } - - $user = (new Instructor)->respond( - messages: [['role' => 'user', 'content' => 'jason is 25 years old']], - responseModel: UserDetails::class, - maxRetries: 2 - ); + } - assert($user->name === "JASON"); + $user = (new Instructor)->respond( + messages: [['role' => 'user', 'content' => 'jason is 25 years old']], + responseModel: UserDetails::class, + maxRetries: 2 +); + +assert($user->name === "JASON"); ``` See [Symfony docs](https://symfony.com/doc/current/reference/constraints/Callback.html) for more details on how to use Callback constraint. diff --git a/docs/data_model.md b/docs/data_model.md index 98eb7103..7498fd5e 100644 --- a/docs/data_model.md +++ b/docs/data_model.md @@ -1,5 +1,78 @@ ## Specifying Data Model +### Scalar Values + +Instructor can extract scalar values from text and assign them to your response model's properties. + +### Example: String result + +```php +respond( + messages: "His name is Jason, he is 28 years old.", + responseModel: Scalar::string(name: 'firstName'), +); +// expect($value)->toBeString(); +// expect($value)->toBe("Jason"); +``` + +### Example: Integer result + +```php +respond( + messages: "His name is Jason, he is 28 years old.", + responseModel: Scalar::integer('age'), +); +// expect($value)->toBeInt(); +// expect($value)->toBe(28); +``` + +### Example: Boolean result + +```php +respond( + messages: "His name is Jason, he is 28 years old.", + responseModel: Scalar::boolean(name: 'isAdult'), +); +// expect($age)->toBeBool(); +// expect($age)->toBe(true); +``` + +### Example: Float result + +```php +respond( + messages: "His name is Jason, he is 28 years old and his 100m sprint record is 11.6 seconds.", + responseModel: Scalar::float(name: 'recordTime'), +); +// expect($value)->toBeFloat(); +// expect($value)->toBe(11.6); +``` + +### Example: Select one of the options + +```php +respond( + messages: "His name is Dietmar, he is 28 years old and he lives in Germany.", + responseModel: Scalar::select( + options: ['US citizen', 'Canada citizen', 'other'], + name: 'citizenshipGroup' + ), +); +// expect($age)->toBeString(); +// expect($age)->toBe('other'); +``` + + ### Type Hints Use PHP type hints to specify the type of extracted data. @@ -16,6 +89,9 @@ class Person { } ``` +Instructor will only fill in the fields that are public. Private and protected fields are ignored and their values are not going to be extracted (they will be left empty, with default values set as defined in your class). + + ### DocBlock type hints You can also use PHP DocBlock style comments to specify the type of extracted data. This is useful when you want to specify property types for LLM, but can't or don't want to enforce type at the code level. @@ -93,7 +169,7 @@ $person = (new Instructor)->respond( messages: [['role' => 'user', 'content' => $text]], responseModel: Person::class, client: OpenAI::client($yourApiKey), -); // client is passed explicitly, can specify eg. different base URL +); // client is passed explicitly, can specify e.g. different base URL // data is extracted into an object of given class assert($person instanceof Person); // true diff --git a/experimental/BooleanLLMFunction.php b/experimental/BooleanLLMFunction.php deleted file mode 100644 index 8a7b7be1..00000000 --- a/experimental/BooleanLLMFunction.php +++ /dev/null @@ -1,33 +0,0 @@ -llm = $llm ?? new LLM(); - } - - public function make( - string $name, - string $description, - array $messages, - string $model = 'gpt-4-0125-preview', - array $options = [] - ) : ?bool { - $schema = (new SimpleFunctionCallSchema)->make( - $name, - $description, - 'value', - 'Derive correct value based on context', - PhpType::BOOLEAN - ); - $json = $this->llm->callFunction($messages, $name, $schema, $model, $options); - $deserialized = json_decode($json, true); - return $deserialized['value'] ?? null; - } - } \ No newline at end of file diff --git a/experimental/IntegerLLMFunction.php b/experimental/IntegerLLMFunction.php deleted file mode 100644 index c912dd15..00000000 --- a/experimental/IntegerLLMFunction.php +++ /dev/null @@ -1,34 +0,0 @@ -llm = $llm ?? new LLM(); - } - - public function make( - string $name, - string $description, - array $messages, - string $model = 'gpt-4-0125-preview', - array $options = [] - ) : ?int { - $schema = (new SimpleFunctionCallSchema)->make( - $name, - $description, - 'value', - 'Derive correct value based on context', - PhpType::INTEGER - ); - $json = $this->llm->callFunction($messages, $name, $schema, $model, $options); - dump($json); - $deserialized = json_decode($json, true); - return $deserialized['value'] ?? null; - } -} diff --git a/experimental/SimpleFunctionCallSchema.php b/experimental/SimpleFunctionCallSchema.php deleted file mode 100644 index ae146431..00000000 --- a/experimental/SimpleFunctionCallSchema.php +++ /dev/null @@ -1,39 +0,0 @@ -name = $name; - $functionData->description = $description; - $functionData->parameters = [ - 'type' => 'object', - 'properties' => [ - $argName => [ - 'description' => $argDescription, - 'type' => JsonType::fromPhpType($argType)->value, - ], - ], - ]; - if ($argType === 'enum') { - $values = array_values((new ReflectionEnum($argType))->getConstants()); - $functionData->parameters['properties'][$argName]['enum'] = $values; - } - $functionData->required[] = $argName; - return [ - 'type' => 'function', - 'function' => $functionData, - ]; - } -} \ No newline at end of file diff --git a/experimental/StringLLMFunction.php b/experimental/StringLLMFunction.php deleted file mode 100644 index 2efc433a..00000000 --- a/experimental/StringLLMFunction.php +++ /dev/null @@ -1,33 +0,0 @@ -llm = $llm ?? new LLM(); - } - - public function make( - string $name, - string $description, - array $messages, - string $model = 'gpt-4-0125-preview', - array $options = [] - ) : ?string { - $schema = (new SimpleFunctionCallSchema)->make( - $name, - $description, - 'value', - 'Derive correct value based on context', - PhpType::STRING - ); - $json = $this->llm->callFunction($messages, $name, $schema, $model, $options); - $deserialized = json_decode($json, true); - return $deserialized['value'] ?? null; - } -} \ No newline at end of file diff --git a/src/Contracts/CanDeserialize.php b/src/Contracts/CanDeserialize.php index bafca960..fbcf98a0 100644 --- a/src/Contracts/CanDeserialize.php +++ b/src/Contracts/CanDeserialize.php @@ -4,5 +4,5 @@ interface CanDeserialize { - public function deserialize(string $data, string $model) : object; + public function deserialize(string $data, string $dataModelClass) : object; } \ No newline at end of file diff --git a/src/Contracts/CanDeserializeJson.php b/src/Contracts/CanDeserializeJson.php new file mode 100644 index 00000000..b869fbef --- /dev/null +++ b/src/Contracts/CanDeserializeJson.php @@ -0,0 +1,8 @@ +getAttributes(ArrayOf::class)[0] ?? null; - - if ($arrayOfAttribute) { - $attributeType = $arrayOfAttribute->newInstance()->type; - if (is_array($value)) { - $value = array_map(function ($item) use ($attributeType) { - return $this->serializer->denormalize($item, $attributeType); - }, $value); - } - } - - parent::setAttributeValue($object, $attribute, $value, $format, $context); - } -} diff --git a/src/Extras/ScalarAdapter/Scalar.php b/src/Extras/ScalarAdapter/Scalar.php new file mode 100644 index 00000000..44d195a6 --- /dev/null +++ b/src/Extras/ScalarAdapter/Scalar.php @@ -0,0 +1,171 @@ +name = $name; + $this->description = $description; + $this->type = $type; + $this->required = $required; + $this->defaultValue = $defaultValue; + $this->options = $options; + } + + /** + * Custom JSON schema for scalar value - we ignore all fields in this class and pass only what we want + * by manually creating the array representing JSON Schema of our desired structure. + */ + public function toJsonSchema() : array { + $array = [ + '$comment' => Scalar::class, + 'type' => 'object', + 'properties' => [ + $this->name => [ + 'description' => $this->description, + 'type' => $this->type->toJsonType(), + ], + ], + ]; + if ($this->required) { + $array['required'] = [$this->name]; + } + if (!empty($this->options)) { + $array['properties'][$this->name]['enum'] = $this->options; + } + return $array; + } + + /** + * Deserialize JSON into scalar value + */ + public function fromJson(string $json) : self { + $array = json_decode($json, true); + $value = $array[$this->name] ?? $this->defaultValue; + if (($value === null) && $this->required) { + throw new \Exception("Value is required"); + } + try { + $this->value = match ($this->type) { + ValueType::STRING => (string) $value, + ValueType::INTEGER => (int) $value, + ValueType::FLOAT => (float) $value, + ValueType::BOOLEAN => (bool) $value, + }; + } catch (\Throwable $e) { + throw new \Exception("Failed to deserialize value: " . $e->getMessage()); + } + if (!empty($this->options) && !in_array($this->value, $this->options)) { + throw new \Exception("Value is not in the list of allowed options"); + } + return $this; + } + + public function transform() : mixed { + return $this->value; + } + + static public function integer( + string $name = 'value', + string $description = 'Response value', + bool $required = true, + mixed $defaultValue = null, + array $options = [] + ) : self { + return new self( + name: $name, + description: $description, + type: ValueType::INTEGER, + required: $required, + defaultValue: $defaultValue, + options: $options, + ); + } + + static public function float( + string $name = 'value', + string $description = 'Response value', + bool $required = true, + mixed $defaultValue = null, + array $options = [] + ) : self { + return new self( + name: $name, + description: $description, + type: ValueType::FLOAT, + required: $required, + defaultValue: $defaultValue, + options: $options, + ); + } + + static public function string( + string $name = 'value', + string $description = 'Response value', + bool $required = true, + mixed $defaultValue = null, + array $options = [] + ) : self { + return new self( + name: $name, + description: $description, + type: ValueType::STRING, + required: $required, + defaultValue: $defaultValue, + options: $options, + ); + } + + static public function boolean( + string $name = 'value', + string $description = 'Response value', + bool $required = true, + mixed $defaultValue = null, + ) : self { + return new self( + name: $name, + description: $description, + type: ValueType::BOOLEAN, + required: $required, + defaultValue: $defaultValue, + ); + } + + static public function select( + array $options, + string $name = 'option', + string $description = 'Select option', + bool $required = true, + mixed $defaultValue = null, + ) : self { + return new self( + name: $name, + description: $description, + type: ValueType::STRING, + required: $required, + defaultValue: $defaultValue, + options: $options, + ); + } +} diff --git a/src/Extras/ScalarAdapter/ValueType.php b/src/Extras/ScalarAdapter/ValueType.php new file mode 100644 index 00000000..1c418e77 --- /dev/null +++ b/src/Extras/ScalarAdapter/ValueType.php @@ -0,0 +1,20 @@ + 'string', + ValueType::INTEGER => 'integer', + ValueType::FLOAT => 'number', + ValueType::BOOLEAN => 'boolean', + }; + } +} \ No newline at end of file diff --git a/src/Instructor.php b/src/Instructor.php index 7fbbc84e..ef27eb3e 100644 --- a/src/Instructor.php +++ b/src/Instructor.php @@ -3,61 +3,96 @@ use Cognesy\Instructor\Contracts\CanCallFunction; use Cognesy\Instructor\Contracts\CanDeserialize; -use Cognesy\Instructor\Contracts\CanValidateObject; +use Cognesy\Instructor\Contracts\CanProvideSchema; +use Cognesy\Instructor\Contracts\CanTransformResponse; +use Cognesy\Instructor\Contracts\CanValidateResponse; use Cognesy\Instructor\Deserializers\Symfony\Deserializer; use Cognesy\Instructor\LLMs\OpenAI\LLM; -use Cognesy\Instructor\Schema\FunctionCallSchema; +use Cognesy\Instructor\Schema\PropertyInfoBased\Data\Schema\Schema; +use Cognesy\Instructor\Schema\PropertyInfoBased\Factories\FunctionCallFactory; use Cognesy\Instructor\Validators\Symfony\Validator; use Exception; +/** + * Main access point to Instructor. + * Use respond() method to generate structured responses from LLM calls. + */ class Instructor { - private Deserializer $deserializer; - private Validator $validator; private LLM $llm; - public string $functionName = 'extract_data'; - public string $functionDescription = 'Extract data from provided content'; public $retryPrompt = "Recall function correctly, fix following errors:"; public function __construct( CanCallFunction $llm = null, - CanDeserialize $deserializer = null, - CanValidateObject $validator = null ) { $this->llm = $llm ?? new LLM(); - $this->deserializer = $deserializer ?? new Deserializer(); - $this->validator = $validator ?? new Validator(); } + /** + * Generates a response model via LLM based on provided string or OpenAI style message array + */ public function respond( - array $messages, - string|object $responseModel, + string|array $messages, + string|object|array $responseModel, string $model = 'gpt-4-0125-preview', int $maxRetries = 0, - array $options = [] - ) : ?object { - $schema = (new FunctionCallSchema)->withClass( - $responseModel, - $this->functionName, - $this->functionDescription + array $options = [], + ) : mixed { + if (is_string($messages)) { + $messages = [['role' => 'user', 'content' => $messages]]; + } + $responseModelObject = new ResponseModel($responseModel); + return $this->tryRespond( + $messages, + $model, + $responseModelObject, + $maxRetries, + $options ); + } + + /** + * Executes LLM call loop with validation until success or max retries reached + */ + protected function tryRespond( + array $messages, + string $model, + ResponseModel $responseModel, + int $maxRetries, + array $options + ) : mixed { $retries = 0; while ($retries <= $maxRetries) { - $json = $this->llm->callFunction($messages, $this->functionName, $schema, $model, $options); - $object = $this->deserializer->deserialize($json, $responseModel); - if ($this->validator->validate($object)) { + $json = $this->llm->callFunction( + $messages, + $responseModel->functionName, + $responseModel->functionCall, + $model, + $options + ); + [$object, $errors] = $responseModel->toResponse($json); + if (empty($errors)) { + if ($object instanceof CanTransformResponse) { + return $object->transform(); + } return $object; } $messages[] = ['role' => 'assistant', 'content' => $json]; - $messages[] = ['role' => 'user', 'content' => $this->retryPrompt . '\n' . $this->validator->errors()]; + $messages[] = ['role' => 'user', 'content' => $this->retryPrompt . '\n' . $errors]; $retries++; } - throw new Exception("Failed to extract data due to validation constraints: " . $this->validator->errors()); + throw new Exception("Failed to extract data due to validation constraints: " . $errors); } + /** + * Raw JSON string returned by LLM + */ public function json() : string { return $this->llm->data(); } + /** + * Response data, see: API client documentation (e.g. OpenAI) + */ public function response() : array { return $this->llm->response(); } diff --git a/src/Reflection/Factories/ArrayItemFactory.php b/src/Reflection/Factories/ArrayItemFactory.php deleted file mode 100644 index 4b193701..00000000 --- a/src/Reflection/Factories/ArrayItemFactory.php +++ /dev/null @@ -1,54 +0,0 @@ -type) { - PhpType::STRING => StringPropertyData::asArrayItem($typeDef), - PhpType::INTEGER => IntegerPropertyData::asArrayItem($typeDef), - PhpType::FLOAT => FloatPropertyData::asArrayItem($typeDef), - PhpType::BOOLEAN => BooleanPropertyData::asArrayItem($typeDef), - PhpType::OBJECT => ObjectPropertyData::asArrayItem($typeDef), - PhpType::ENUM => EnumPropertyData::asArrayItem($typeDef), - PhpType::ARRAY => ArrayPropertyData::asArrayItem($typeDef), - default => UndefinedPropertyData::asArrayItem($typeDef), - }; - } - - static public function makeParameterData(TypeDef $typeDef): ParameterData - { - return match ($typeDef->type) { - PhpType::STRING => StringParameterData::asArrayItem($typeDef), - PhpType::INTEGER => IntegerParameterData::asArrayItem($typeDef), - PhpType::FLOAT => FloatParameterData::asArrayItem($typeDef), - PhpType::BOOLEAN => BooleanParameterData::asArrayItem($typeDef), - PhpType::OBJECT => ObjectParameterData::asArrayItem($typeDef), - PhpType::ENUM => EnumParameterData::asArrayItem($typeDef), - PhpType::ARRAY => ArrayParameterData::asArrayItem($typeDef), - default => UndefinedParameterData::asArrayItem($typeDef), - }; - } -} \ No newline at end of file diff --git a/src/Reflection/TypeDefs/SimpleTypeDef.php b/src/Reflection/TypeDefs/SimpleTypeDef.php deleted file mode 100644 index cacfe416..00000000 --- a/src/Reflection/TypeDefs/SimpleTypeDef.php +++ /dev/null @@ -1,13 +0,0 @@ -type = $type; - } -} \ No newline at end of file diff --git a/src/Reflection/TypeDefs/TypeDef.php b/src/Reflection/TypeDefs/TypeDef.php deleted file mode 100644 index fb9edc2f..00000000 --- a/src/Reflection/TypeDefs/TypeDef.php +++ /dev/null @@ -1,9 +0,0 @@ -value = $value; + $this->deserializer = $deserializer ?? new Deserializer(); + $this->validator = $validator ?? new Validator(); + $this->functionCall = $this->makeFunctionCall($value); + } + + /** + * Get validation errors + */ + public function errors() : string { + return $this->validator->errors(); + } + + /** + * Deserialize JSON and validate response object + */ + public function toResponse(string $json) { + $object = $this->deserialize($json); + if ($this->validate($object)) { + return [$object, null]; + } + return [null, $this->errors()]; + } + + /** + * Deserialize response JSON + */ + protected function deserialize(string $json) : mixed { + if ($this->instance instanceof CanDeserializeJson) { + return $this->instance->fromJson($json); + } + // else - use standard deserializer + return $this->deserializer->deserialize($json, $this->class); + } + + /** + * Validate deserialized response object + */ + protected function validate(object $response) : bool { + if ($response instanceof CanSelfValidate) { + return $response->validate(); + } + // else - use standard validator + return $this->validator->validate($response); + } + + /** + * Generate function call data (depending on the response model type) + */ + protected function makeFunctionCall(string|object|array $requestedModel) { + if (is_string($requestedModel)) { + $this->class = $requestedModel; + $this->instance = null; + return (new FunctionCallFactory)->fromClass( + $requestedModel, + $this->functionName, + $this->functionDescription + ); + } + + if (is_array($requestedModel)) { + $this->class = $requestedModel['$comment'] ?? null; + if (empty($this->class)) { + throw new Exception('Provided JSON schema must contain $comment field with fully qualified class name'); + } + $this->instance = null; + return (new FunctionCallFactory)->fromArray( + $requestedModel, + $this->functionName, + $this->functionDescription + ); + } + + if (is_subclass_of($requestedModel, CanProvideSchema::class)) { + $this->class = get_class($requestedModel); + $this->instance = $requestedModel; + return (new FunctionCallFactory)->fromArray( + $requestedModel->toJsonSchema(), + $this->functionName, + $this->functionDescription + ); + } + + if ($requestedModel instanceof Schema) { + $this->class = $requestedModel->type->class; + $this->instance = $requestedModel; + return (new FunctionCallFactory)->fromSchema( + $requestedModel, + $this->functionName, + $this->functionDescription + ); + } + + $this->class = get_class($requestedModel); + $this->instance = null; + return (new FunctionCallFactory)->fromClass( + get_class($requestedModel), + $this->functionName, + $this->functionDescription + ); + } +} \ No newline at end of file diff --git a/src/Schema/PropertyInfoBased/Data/Reference.php b/src/Schema/PropertyInfoBased/Data/Reference.php new file mode 100644 index 00000000..0e5cb5b5 --- /dev/null +++ b/src/Schema/PropertyInfoBased/Data/Reference.php @@ -0,0 +1,12 @@ +nestedItemSchema = $nestedItemSchema; + } + + public function toArray(callable $refCallback = null) : array + { + return array_filter([ + 'type' => 'array', + 'items' => $this->nestedItemSchema->toArray($refCallback), + 'description' => $this->description, + ]); + } +} diff --git a/src/Schema/PropertyInfoBased/Data/Schema/EnumSchema.php b/src/Schema/PropertyInfoBased/Data/Schema/EnumSchema.php new file mode 100644 index 00000000..5e33d839 --- /dev/null +++ b/src/Schema/PropertyInfoBased/Data/Schema/EnumSchema.php @@ -0,0 +1,15 @@ + $this->type->enumType ?? 'string', + 'enum' => $this->type->enumValues ?? [], + 'description' => $this->description ?? '', + ]); + } +} diff --git a/src/Schema/PropertyInfoBased/Data/Schema/ObjectRefSchema.php b/src/Schema/PropertyInfoBased/Data/Schema/ObjectRefSchema.php new file mode 100644 index 00000000..0ac96b53 --- /dev/null +++ b/src/Schema/PropertyInfoBased/Data/Schema/ObjectRefSchema.php @@ -0,0 +1,27 @@ +className($this->type->class); + $id = "#/{$this->defsLabel}/{$class}"; + if ($refCallback) { + $refCallback(new Reference($id, $this->type->class)); + } + return array_filter([ + '$ref' => $id, + 'description' => $this->description, + ]); + } + + private function className(string $fqcn) : string + { + $classSegments = explode('\\', $fqcn); + return array_pop($classSegments); + } +} \ No newline at end of file diff --git a/src/Schema/PropertyInfoBased/Data/Schema/ObjectSchema.php b/src/Schema/PropertyInfoBased/Data/Schema/ObjectSchema.php new file mode 100644 index 00000000..a571721f --- /dev/null +++ b/src/Schema/PropertyInfoBased/Data/Schema/ObjectSchema.php @@ -0,0 +1,39 @@ +properties = $properties; + $this->required = $required; + } + + public function toArray(callable $refCallback = null) : array + { + $propertyDefs = []; + foreach ($this->properties as $property) { + $propertyDefs[$property->name] = $property->toArray($refCallback); + } + return array_filter([ + 'type' => 'object', + 'properties' => $propertyDefs, + 'required' => $this->required, + 'description' => $this->description, + ]); + } +} diff --git a/src/Schema/PropertyInfoBased/Data/Schema/ScalarSchema.php b/src/Schema/PropertyInfoBased/Data/Schema/ScalarSchema.php new file mode 100644 index 00000000..d7a53f86 --- /dev/null +++ b/src/Schema/PropertyInfoBased/Data/Schema/ScalarSchema.php @@ -0,0 +1,14 @@ + $this->type->jsonType(), + 'description' => $this->description, + ]); + } +} diff --git a/src/Schema/PropertyInfoBased/Data/Schema/Schema.php b/src/Schema/PropertyInfoBased/Data/Schema/Schema.php new file mode 100644 index 00000000..60f2a4fd --- /dev/null +++ b/src/Schema/PropertyInfoBased/Data/Schema/Schema.php @@ -0,0 +1,30 @@ +type = $type; + $this->name = $name; + $this->description = $description; + } + + public function toArray(callable $refCallback = null) : array + { + return array_filter([ + 'type' => $this->type->type, + 'description' => $this->description, + ]); + } +} diff --git a/src/Schema/PropertyInfoBased/Data/TypeDetails.php b/src/Schema/PropertyInfoBased/Data/TypeDetails.php new file mode 100644 index 00000000..e5456773 --- /dev/null +++ b/src/Schema/PropertyInfoBased/Data/TypeDetails.php @@ -0,0 +1,37 @@ +type) { + 'object' => $this->class, + 'enum' => $this->class, + 'array' => $this->nestedType->__toString().'[]', + default => $this->type, + }; + } + + public function jsonType() : string + { + return match ($this->type) { + 'object' => 'object', + 'enum' => $this->enumType ?? 'string', + 'array' => 'array', + 'int' => 'integer', + 'string' => 'string', + 'bool' => 'boolean', + 'float' => 'number', + default => throw new \Exception('Unknown type: '.$this->type), + }; + } +} diff --git a/src/Schema/PropertyInfoBased/Factories/FunctionCallFactory.php b/src/Schema/PropertyInfoBased/Factories/FunctionCallFactory.php new file mode 100644 index 00000000..3d6cd98f --- /dev/null +++ b/src/Schema/PropertyInfoBased/Factories/FunctionCallFactory.php @@ -0,0 +1,112 @@ +references = new ReferenceQueue; + } + + /** + * Renders function call based on the class + */ + public function fromClass( + string $class, + string $customName = 'extract_object', + string $customDescription = 'Extract parameters from chat content' + ) : array { + $this->schema = (new SchemaFactory)->schema($class); + $this->jsonSchema = $this->schema->toArray($this->onObjectRef(...)); + return $this->render( + $this->jsonSchema, + $customName, + $customDescription + ); + } + + /** + * Render function call based on the Schema object + */ + public function fromSchema( + Schema $schema, + string $customName = 'extract_object', + string $customDescription = 'Extract parameters from chat content' + ) : array { + $this->schema = $schema; + $this->jsonSchema = $schema->toArray($this->onObjectRef(...)); + return $this->render( + $this->jsonSchema, + $customName, + $customDescription + ); + } + + /** + * Render function call based on the raw JSON Schema array + */ + public function fromArray( + array $jsonSchema, + string $customName = 'extract_object', + string $customDescription = 'Extract parameters from chat content' + ) : array { + $this->schema = null; + $this->jsonSchema = $jsonSchema; + return $this->render( + $this->jsonSchema, + $customName, + $customDescription + ); + } + + /** + * Extract the schema model from a function and constructs a function call JSON schema array + */ + protected function render( + array $jsonSchema, + string $name, + string $description + ) : array { + $functionCall = [ + 'type' => 'function', + 'function' => [ + 'name' => $name, + 'description' => $description, + 'parameters' => $jsonSchema, + ] + ]; + if ($this->references->hasQueued()) { + $definitions = $this->definitions(); + $functionCall['function']['parameters']['definitions'] = $definitions; + } + return $functionCall; + } + + /** + * Recursive extraction of the schema definitions from the references + */ + protected function definitions() : array { + $definitions = []; + while($this->references->hasQueued()) { + $reference = $this->references->dequeue(); + if ($reference == null) { + break; + } + $definitions[$reference->id] = (new SchemaFactory)->schema($reference->class)->toArray($this->onObjectRef(...)); + } + return $definitions; + } + + /** + * Callback called when an object reference is found + */ + private function onObjectRef(Reference $reference) { + $this->references->queue($reference); + } +} diff --git a/src/Schema/PropertyInfoBased/Factories/InstanceSchemaFactory.php b/src/Schema/PropertyInfoBased/Factories/InstanceSchemaFactory.php new file mode 100644 index 00000000..aed71595 --- /dev/null +++ b/src/Schema/PropertyInfoBased/Factories/InstanceSchemaFactory.php @@ -0,0 +1,13 @@ +makeSchema((new TypeDetailsFactory)->fromTypeName($anyType), '', ''); + } +} \ No newline at end of file diff --git a/src/Schema/PropertyInfoBased/Factories/SchemaFactory.php b/src/Schema/PropertyInfoBased/Factories/SchemaFactory.php new file mode 100644 index 00000000..c6681d86 --- /dev/null +++ b/src/Schema/PropertyInfoBased/Factories/SchemaFactory.php @@ -0,0 +1,141 @@ +makeSchema((new TypeDetailsFactory)->fromTypeName($anyType), '', ''); + } + + /** + * Makes schema for top level item (depending on the type) + * + * @param TypeDetails $type + * @param string $name + * @param string $description + * @return Schema + */ + protected function makeSchema(TypeDetails $type, string $name, string $description) : Schema + { + return match ($type->type) { + 'object' => new ObjectSchema( + $type, + $name, + $description, + $this->getPropertySchemas($type->class), + (new ClassInfo)->getRequiredProperties($type->class), + ), + 'enum' => new EnumSchema($type, $name, $description), + 'array' => new ArraySchema( + $type, + $name, + $description, + $this->makePropertySchema($type, $name, $description), + ), + 'int', 'string', 'bool', 'float' => new ScalarSchema($type, $name, $description), + default => throw new \Exception('Unknown type: '.$type->type), + }; + } + + /** + * Makes schema for properties + * + * @param TypeDetails $type + * @param string $name + * @param string $description + * @return Schema + */ + protected function makePropertySchema(TypeDetails $type, string $name, string $description): Schema + { + return match ($type->type) { + 'object' => $this->makePropertyObject($type, $name, $description), + 'enum' => new EnumSchema($type, $name, $description), + 'array' => new ArraySchema( + $type, + $name, + $description, + $this->makePropertySchema($type->nestedType, '', ''), + ), + 'int', 'string', 'bool', 'float' => new ScalarSchema($type, $name, $description), + default => throw new \Exception('Unknown type: ' . $type->type), + }; + } + + /** + * Makes schema for object properties + * + * @param TypeDetails $type + * @param string $name + * @param string $description + * @return Schema + */ + protected function makePropertyObject(TypeDetails $type, string $name, string $description): Schema + { + if ($this->useObjectReferences) { + return new ObjectRefSchema($type, $name, $description); + } + return new ObjectSchema( + $type, + $name, + $description, + $this->getPropertySchemas($type->class), + (new ClassInfo)->getRequiredProperties($type->class), + ); + } + + /** + * Gets all the property schemas of a class + * + * @param string $class + * @return Schema[] + */ + protected function getPropertySchemas(string $class) : array { + $properties = (new ClassInfo)->getProperties($class); + $propertySchemas = []; + foreach ($properties as $property) { + $propertySchemas[$property] = $this->getPropertySchema($class, $property); + } + return $propertySchemas; + } + + /** + * Gets the schema of a property + * + * @param string $class + * @param string $property + * @return Schema + */ + protected function getPropertySchema(string $class, string $property) : Schema { + $propertyInfoType = (new ClassInfo)->getType($class, $property); + $propertyDescription = (new ClassInfo)->getDescription($class, $property); + $type = (new TypeDetailsFactory)->fromPropertyInfo($propertyInfoType); + return $this->makePropertySchema($type, $property, $propertyDescription); + } +} \ No newline at end of file diff --git a/src/Schema/PropertyInfoBased/Factories/TypeDetailsFactory.php b/src/Schema/PropertyInfoBased/Factories/TypeDetailsFactory.php new file mode 100644 index 00000000..bcf990b0 --- /dev/null +++ b/src/Schema/PropertyInfoBased/Factories/TypeDetailsFactory.php @@ -0,0 +1,178 @@ +normalizeIfArray($anyType)) { + 'object' => throw new \Exception('Object type must have a class name'), + 'enum' => throw new \Exception('Enum type must have a class'), + 'array' => $this->arrayType($anyType), + 'int', 'string', 'bool', 'float' => $this->scalarType($anyType), + default => $this->objectType($anyType), + }; + } + + /** + * Create TypeDetails from PropertyInfo + * + * @param Type $propertyInfo + * @return TypeDetails + */ + public function fromPropertyInfo(Type $propertyInfo) : TypeDetails { + $class = $propertyInfo->getClassName(); + $type = $propertyInfo->getBuiltinType(); + return match($type) { + 'object', 'enum' => $this->fromTypeName($class), + 'array' => $this->fromTypeName($this->arrayTypeString($propertyInfo)), // express array type as [] + default => new TypeDetails($type), + }; + } + + /** + * Create TypeDetails from object instance + * + * @param object $instance + * @return TypeDetails + */ + public function fromValue(mixed $anyVar) : TypeDetails { + $type = gettype($anyVar); + return match ($type) { + 'object' => $this->objectType(get_class($anyVar)), + 'array' => $this->arrayType($this->arrayTypeStringFromValues($anyVar)), + 'integer', 'string', 'boolean', 'double' => $this->scalarType($type), + default => throw new \Exception('Unsupported type: '.$type), + }; + } + + /** + * Create TypeDetails for atom (scalar) type + * + * @param string $type + * @return TypeDetails + */ + protected function scalarType(string $type) : TypeDetails { + return new TypeDetails($type, null, null, null, null); + } + + /** + * Create TypeDetails for array type + * + * @param string $typeSpec + * @return TypeDetails + */ + protected function arrayType(string $typeSpec) : TypeDetails { + $typeName = $this->getArrayType($typeSpec); + $instance = new TypeDetails('array', null, null, null, null); + $instance->nestedType = match ($typeName) { + 'mixed' => throw new \Exception('Mixed type not supported'), + 'array' => throw new \Exception('Nested arrays not supported'), + 'int', 'string', 'bool', 'float' => $this->scalarType($typeName), + default => $this->objectType($typeName), + }; + return $instance; + } + + /** + * Create TypeDetails for object type + * + * @param string $typeName + * @return TypeDetails + */ + protected function objectType(string $typeName) : TypeDetails { + if ((new ClassInfo)->isEnum($typeName)) { + return $this->enumType($typeName); + } + $instance = new TypeDetails('object', $typeName, null, null, null); + $instance->class = $typeName; + return $instance; + } + + /** + * Create TypeDetails for enum type + * + * @param string $typeName + * @return TypeDetails + */ + protected function enumType(string $typeName) : TypeDetails { + // enum specific + if (!(new ClassInfo)->isBackedEnum($typeName)) { + throw new \Exception('Enum must be backed by a string or int'); + } + $backingType = (new ClassInfo)->enumBackingType($typeName); + if (!in_array($backingType, ['int', 'string'])) { + throw new \Exception('Enum must be backed by a string or int'); + } + $instance = new TypeDetails('enum', $typeName, null, null, null); + $instance->class = $typeName; + $instance->enumType = $backingType; + $instance->enumValues = (new ClassInfo)->enumValues($typeName); + return $instance; + } + + /** + * Extract array type from type string + */ + private function getArrayType(string $typeSpec) : string { + if (substr($typeSpec, -2) !== '[]') { + throw new \Exception('Array type must end with []'); + } + return substr($typeSpec, 0, -2); + } + + /** + * Express Type[] type as array + */ + private function normalizeIfArray(string $type) : string { + if (substr($type, -2) === '[]') { + return 'array'; + } + return $type; + } + + /** + * Express array type as [] + */ + private function arrayTypeString(Type $propertyInfo) : string { + $collectionValueType = $propertyInfo->getCollectionValueTypes()[0]; + if ($collectionValueType === null) { + throw new \Exception('Array type must have a collection value type specified'); + } + $nestedType = $collectionValueType->getBuiltinType() ?? ''; + $nestedClass = $collectionValueType->getClassName() ?? ''; + return empty($nestedClass) ? "{$nestedType}[]" : "{$nestedClass}[]"; + } + + /** + * Determine array type from array values + */ + private function arrayTypeStringFromValues(array $array) : string + { + if (empty($array)) { + throw new \Exception('Array is empty, cannot determine type of elements'); + } + $nestedType = gettype($array[0]); + if (in_array($nestedType, ['int', 'string', 'bool', 'float'])) { + return "{$nestedType}[]"; + } + if ($nestedType === 'object') { + $nestedClass = get_class($array[0]); + return "{$nestedClass}[]"; + } + throw new \Exception('Unsupported array element type: '.$nestedType); + } +} diff --git a/src/Schema/PropertyInfoBased/Utils/ClassInfo.php b/src/Schema/PropertyInfoBased/Utils/ClassInfo.php new file mode 100644 index 00000000..91b6e1ea --- /dev/null +++ b/src/Schema/PropertyInfoBased/Utils/ClassInfo.php @@ -0,0 +1,105 @@ +extractor()->getTypes($class, $property); + } + + public function getType(string $class, string $property): Type { + $propertyTypes = $this->getTypes($class, $property); + if (!count($propertyTypes)) { + throw new \Exception("No type found for property: $class::$property"); + } + if (count($propertyTypes) > 1) { + throw new \Exception("Unsupported union type found for property: $class::$property"); + } + return $propertyTypes[0]; + } + + public function getProperties(string $class) : array { + return $this->extractor()->getProperties($class); + } + + public function getDescription(string $class, string $property): string { + $extractor = $this->extractor(); + return trim(implode(' ', [ + $extractor->getShortDescription($class, $property), + $extractor->getLongDescription($class, $property), + ])); + } + + public function getRequiredProperties(string $class) : array { + $properties = $this->getProperties($class); + $required = []; + foreach ($properties as $property) { + if (!$this->isNullable($class, $property)) { + $required[] = $property; + } + } + return $required; + } + + public function isPublic(string $class, string $property) : bool { + return (new ReflectionClass($class))->getProperty($property)->isPublic(); + } + + public function isNullable(string $class, string $property) : bool { + $types = $this->extractor()->getTypes($class, $property); + foreach ($types as $type) { + if ($type->isNullable()) { + return true; + } + } + return false; + } + + public function isEnum(string $class) : bool { + return (new ReflectionClass($class))->isEnum(); + } + + public function isBackedEnum(string $class) : bool { + return (new ReflectionEnum($class))->isBacked(); + } + + public function enumBackingType(string $class) : string { + return (new ReflectionEnum($class))->getBackingType()?->getName(); + } + + public function enumValues(string $class) : array { + $enum = new ReflectionEnum($class); + $values = []; + foreach ($enum->getReflectionConstants() as $item) { + $values[] = $item->getValue()->value; + } + return $values; + } + + public function implementsInterface(string $anyType, string $interface) : bool { + if (!class_exists($anyType)) { + return false; + } + return in_array($interface, class_implements($anyType)); + } +} diff --git a/src/Schema/PropertyInfoBased/Utils/ReferenceQueue.php b/src/Schema/PropertyInfoBased/Utils/ReferenceQueue.php new file mode 100644 index 00000000..24023253 --- /dev/null +++ b/src/Schema/PropertyInfoBased/Utils/ReferenceQueue.php @@ -0,0 +1,38 @@ +references[$reference->class])) { + $this->references[$reference->class] = $reference; + } + } + + public function dequeue() : ?Reference { + foreach ($this->references as $class => $reference) { + if ($reference->isRendered === false) { + $this->references[$class]->isRendered = true; + return $reference; + } + } + return null; + } + + public function hasQueued() : bool { + foreach ($this->references as $reference) { + if ($reference->isRendered === false) { + return true; + } + } + return false; + } +} diff --git a/src/Schema/FCArray.php b/src/Schema/ReflectionBased/Data/FCArray.php similarity index 85% rename from src/Schema/FCArray.php rename to src/Schema/ReflectionBased/Data/FCArray.php index 97061f3a..ab51fac5 100644 --- a/src/Schema/FCArray.php +++ b/src/Schema/ReflectionBased/Data/FCArray.php @@ -1,5 +1,5 @@ name = $class->getName(); - $this->description = Utils\DescriptionUtils::getClassDescription($class); + $this->description = \Cognesy\Instructor\Schema\ReflectionBased\Reflection\Utils\DescriptionUtils::getClassDescription($class); $this->properties = $this->getProperties($class); } diff --git a/src/Reflection/Enums/JsonType.php b/src/Schema/ReflectionBased/Reflection/Enums/JsonType.php similarity index 90% rename from src/Reflection/Enums/JsonType.php rename to src/Schema/ReflectionBased/Reflection/Enums/JsonType.php index 2294b327..5014b302 100644 --- a/src/Reflection/Enums/JsonType.php +++ b/src/Schema/ReflectionBased/Reflection/Enums/JsonType.php @@ -1,5 +1,5 @@ type) { + PhpType::STRING => StringPropertyData::asArrayItem($typeDef), + PhpType::INTEGER => IntegerPropertyData::asArrayItem($typeDef), + PhpType::FLOAT => FloatPropertyData::asArrayItem($typeDef), + PhpType::BOOLEAN => BooleanPropertyData::asArrayItem($typeDef), + PhpType::OBJECT => ObjectPropertyData::asArrayItem($typeDef), + PhpType::ENUM => EnumPropertyData::asArrayItem($typeDef), + PhpType::ARRAY => ArrayPropertyData::asArrayItem($typeDef), + default => UndefinedPropertyData::asArrayItem($typeDef), + }; + } + + static public function makeParameterData(TypeDef $typeDef): ParameterData + { + return match ($typeDef->type) { + PhpType::STRING => StringParameterData::asArrayItem($typeDef), + PhpType::INTEGER => IntegerParameterData::asArrayItem($typeDef), + PhpType::FLOAT => FloatParameterData::asArrayItem($typeDef), + PhpType::BOOLEAN => BooleanParameterData::asArrayItem($typeDef), + PhpType::OBJECT => ObjectParameterData::asArrayItem($typeDef), + PhpType::ENUM => EnumParameterData::asArrayItem($typeDef), + PhpType::ARRAY => ArrayParameterData::asArrayItem($typeDef), + default => UndefinedParameterData::asArrayItem($typeDef), + }; + } +} \ No newline at end of file diff --git a/src/Reflection/Factories/ParameterDataFactory.php b/src/Schema/ReflectionBased/Reflection/Factories/ParameterDataFactory.php similarity index 55% rename from src/Reflection/Factories/ParameterDataFactory.php rename to src/Schema/ReflectionBased/Reflection/Factories/ParameterDataFactory.php index dd739d3c..5d47c87e 100644 --- a/src/Reflection/Factories/ParameterDataFactory.php +++ b/src/Schema/ReflectionBased/Reflection/Factories/ParameterDataFactory.php @@ -1,16 +1,16 @@ name = $function->getName(); - $this->description = Utils\DescriptionUtils::getFunctionDescription($function); + $this->description = \Cognesy\Instructor\Schema\ReflectionBased\Reflection\Utils\DescriptionUtils::getFunctionDescription($function); $this->parameters = $this->getParameters($function); } diff --git a/src/Reflection/MethodData.php b/src/Schema/ReflectionBased/Reflection/MethodData.php similarity index 65% rename from src/Reflection/MethodData.php rename to src/Schema/ReflectionBased/Reflection/MethodData.php index 36d034d8..08f0e2e2 100644 --- a/src/Reflection/MethodData.php +++ b/src/Schema/ReflectionBased/Reflection/MethodData.php @@ -1,16 +1,16 @@ name = $method->getName(); - $this->description = Utils\DescriptionUtils::getMethodDescription($method); + $this->description = \Cognesy\Instructor\Schema\ReflectionBased\Reflection\Utils\DescriptionUtils::getMethodDescription($method); $this->parameters = $this->getParameters($method); } /** - * @return ParameterData[] + * @return \Cognesy\Instructor\Schema\ReflectionBased\Reflection\ParameterData\ParameterData[] */ public function getParameters(ReflectionMethod $method) : array { $classProperties = $method->getParameters(); diff --git a/src/Reflection/ParameterData/ArrayParameterData.php b/src/Schema/ReflectionBased/Reflection/ParameterData/ArrayParameterData.php similarity index 70% rename from src/Reflection/ParameterData/ArrayParameterData.php rename to src/Schema/ReflectionBased/Reflection/ParameterData/ArrayParameterData.php index 886a7e6e..44296e6b 100644 --- a/src/Reflection/ParameterData/ArrayParameterData.php +++ b/src/Schema/ReflectionBased/Reflection/ParameterData/ArrayParameterData.php @@ -1,11 +1,11 @@ type = $type; + } +} \ No newline at end of file diff --git a/src/Schema/ReflectionBased/Reflection/TypeDefs/TypeDef.php b/src/Schema/ReflectionBased/Reflection/TypeDefs/TypeDef.php new file mode 100644 index 00000000..93a29483 --- /dev/null +++ b/src/Schema/ReflectionBased/Reflection/TypeDefs/TypeDef.php @@ -0,0 +1,9 @@ +addLoader(new AttributeLoader()) ->getValidator(); - $this->errors = $validator->validate($object); + $this->errors = $validator->validate($response); return (count($this->errors) == 0); } public function errors() : string { - $errors = []; $errors[] = "Invalid values found:"; foreach ($this->errors as $error) { $path = $error->getPropertyPath(); diff --git a/tests/.pest/snapshots/Feature/SchemaTest/it_creates_function_call___function.snap b/tests/.pest/snapshots/Feature/ReflectionSchemaTest/it_creates_function_call___function.snap similarity index 100% rename from tests/.pest/snapshots/Feature/SchemaTest/it_creates_function_call___function.snap rename to tests/.pest/snapshots/Feature/ReflectionSchemaTest/it_creates_function_call___function.snap diff --git a/tests/.pest/snapshots/Feature/SchemaTest/it_creates_function_call___method.snap b/tests/.pest/snapshots/Feature/ReflectionSchemaTest/it_creates_function_call___method.snap similarity index 100% rename from tests/.pest/snapshots/Feature/SchemaTest/it_creates_function_call___method.snap rename to tests/.pest/snapshots/Feature/ReflectionSchemaTest/it_creates_function_call___method.snap diff --git a/tests/.pest/snapshots/Feature/SchemaTest/it_creates_function_call___object.snap b/tests/.pest/snapshots/Feature/ReflectionSchemaTest/it_creates_function_call___object.snap similarity index 100% rename from tests/.pest/snapshots/Feature/SchemaTest/it_creates_function_call___object.snap rename to tests/.pest/snapshots/Feature/ReflectionSchemaTest/it_creates_function_call___object.snap diff --git a/tests/.pest/snapshots/Feature/SymfonySchemaTest/it_creates_function_call___object.snap b/tests/.pest/snapshots/Feature/SymfonySchemaTest/it_creates_function_call___object.snap new file mode 100644 index 00000000..77b7680d --- /dev/null +++ b/tests/.pest/snapshots/Feature/SymfonySchemaTest/it_creates_function_call___object.snap @@ -0,0 +1,93 @@ +{ + "type": "function", + "function": { + "name": "addEvent", + "description": "Extract object from provided content", + "parameters": { + "type": "object", + "properties": { + "events": { + "type": "array", + "items": { + "type": "object", + "properties": { + "title": { + "type": "string", + "description": "Title of the event" + }, + "description": { + "type": "string", + "description": "Concise, informative description of the event" + }, + "type": { + "type": "string", + "enum": [ + "risk", + "issue", + "action", + "progress", + "other" + ], + "description": "Type of the event" + }, + "status": { + "type": "string", + "enum": [ + "open", + "closed", + "unknown" + ], + "description": "Status of the event" + }, + "stakeholders": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": { + "type": "string", + "description": "Name of the stakeholder" + }, + "role": { + "type": "string", + "enum": [ + "customer", + "vendor", + "system integrator", + "other" + ], + "description": "Role of the stakeholder, if specified" + }, + "details": { + "type": "string", + "description": "Any details on the stakeholder, if specified - any mentions of company, organization, structure, group, team, function" + } + }, + "required": [ + "name", + "role" + ] + } + }, + "date": { + "type": "string", + "description": "Date of the event if reported in the text" + } + }, + "required": [ + "title", + "description", + "type", + "status", + "stakeholders" + ] + }, + "description": "List of events extracted from the text" + } + }, + "required": [ + "events" + ] + } + } +} \ No newline at end of file diff --git a/tests/Examples/PersonWithValidationMixin.php b/tests/Examples/PersonWithValidationMixin.php new file mode 100644 index 00000000..50e1107d --- /dev/null +++ b/tests/Examples/PersonWithValidationMixin.php @@ -0,0 +1,25 @@ +age < 18) { + $errors[] = [ + 'value' => $this->age, + 'path' => 'age', + 'message' => 'Person must be adult.', + ]; + } + return $errors; + } +} diff --git a/tests/Examples/Schema/ComplexClass.php b/tests/Examples/Schema/ComplexClass.php new file mode 100644 index 00000000..2305fece --- /dev/null +++ b/tests/Examples/Schema/ComplexClass.php @@ -0,0 +1,35 @@ +respond( [['role' => 'user', 'content' => $text]], Events::class, + maxRetries: 2, ); - // dump($events); + dump($events); expect($events)->toBeInstanceOf(Events::class); expect($events->events)->toBeArray(); expect($events->events[0])->toBeInstanceOf(Event::class); diff --git a/tests/Feature/FeaturesTest.php b/tests/Feature/FeaturesTest.php index 3861a5d2..75738e37 100644 --- a/tests/Feature/FeaturesTest.php +++ b/tests/Feature/FeaturesTest.php @@ -1,9 +1,28 @@ shouldReceive('callFunction')->andReturnUsing( + fn() => '{"name":"Jason","age":28}', + ); + + $person = (new Instructor(llm: $mockLLM))->respond( + messages: "His name is Jason, he is 28 years old.", + responseModel: Person::class, + ); + // dump($person); + expect($person)->toBeInstanceOf(Person::class); + expect($person->name)->toBe('Jason'); + expect($person->age)->toBe(28); +})->only(); + + it('self-corrects values extracted by LLM based on validation results', function () { $mockLLM = Mockery::mock(LLM::class); $mockLLM->shouldReceive('callFunction')->andReturnUsing( diff --git a/tests/Feature/SchemaTest.php b/tests/Feature/ReflectionSchemaTest.php similarity index 72% rename from tests/Feature/SchemaTest.php rename to tests/Feature/ReflectionSchemaTest.php index ca48c76f..ce319221 100644 --- a/tests/Feature/SchemaTest.php +++ b/tests/Feature/ReflectionSchemaTest.php @@ -1,23 +1,25 @@ withFunction(createEvent(...)); + $array = (new FunctionCallFactory)->fromFunction(createEvent(...)); // dump($array); expect($array)->toBeArray(); expect($array['type'])->toEqual('function'); @@ -32,7 +34,7 @@ function createEvent(string $title, string $date, array $stakeholders): Event { }); it('creates function call - method', function () { - $array = (new FunctionCallSchema)->withMethod((new Events)->createEvent(...)); + $array = (new FunctionCallFactory)->fromMethod((new Events)->createEvent(...)); // dump($array); expect($array)->toBeArray(); expect($array['type'])->toEqual('function'); @@ -47,7 +49,7 @@ function createEvent(string $title, string $date, array $stakeholders): Event { }); it('creates function call - object', function () { - $array = (new FunctionCallSchema)->withClass(Events::class, 'createEvent', 'Extract object from provided content'); + $array = (new FunctionCallFactory)->fromClass(Events::class, 'createEvent', 'Extract object from provided content'); // dump($array); expect($array)->toBeArray(); expect($array['type'])->toEqual('function'); diff --git a/tests/Feature/ScalarsTest.php b/tests/Feature/ScalarsTest.php new file mode 100644 index 00000000..24033f81 --- /dev/null +++ b/tests/Feature/ScalarsTest.php @@ -0,0 +1,97 @@ +shouldReceive('callFunction')->andReturnUsing( + fn() => '{"age":28}', + ); + + $text = "His name is Jason, he is 28 years old."; + $value = (new Instructor(llm: $mockLLM))->respond( + messages: [ + ['role' => 'system', 'content' => $text], + ['role' => 'user', 'content' => 'What is Jason\'s age?'], + ], + responseModel: Scalar::integer('age'), + ); + expect($value)->toBeInt(); + expect($value)->toBe(28); +}); + +it('extracts string type', function () { + $mockLLM = Mockery::mock(LLM::class); + $mockLLM->shouldReceive('callFunction')->andReturnUsing( + fn() => '{"firstName":"Jason"}', + ); + + $text = "His name is Jason, he is 28 years old."; + $value = (new Instructor(llm: $mockLLM))->respond( + messages: [ + ['role' => 'system', 'content' => $text], + ['role' => 'user', 'content' => 'What is his name?'], + ], + responseModel: Scalar::string(name: 'firstName'), + ); + expect($value)->toBeString(); + expect($value)->toBe("Jason"); +}); + +it('extracts float type', function () { + $mockLLM = Mockery::mock(LLM::class); + $mockLLM->shouldReceive('callFunction')->andReturnUsing( + fn() => '{"recordTime":11.6}', + ); + + $text = "His name is Jason, he is 28 years old and his 100m sprint record is 11.6 seconds."; + $value = (new Instructor(llm: $mockLLM))->respond( + messages: [ + ['role' => 'system', 'content' => $text], + ['role' => 'user', 'content' => 'What is Jason\'s best 100m run time?'], + ], + responseModel: Scalar::float(name: 'recordTime'), + ); + expect($value)->toBeFloat(); + expect($value)->toBe(11.6); +}); + +it('extracts bool type', function () { + $mockLLM = Mockery::mock(LLM::class); + $mockLLM->shouldReceive('callFunction')->andReturnUsing( + fn() => '{"isAdult":true}', + ); + + $text = "His name is Jason, he is 28 years old."; + $age = (new Instructor(llm: $mockLLM))->respond( + messages: [ + ['role' => 'system', 'content' => $text], + ['role' => 'user', 'content' => 'Is he adult?'], + ], + responseModel: Scalar::boolean(name: 'isAdult'), + ); + expect($age)->toBeBool(); + expect($age)->toBe(true); +}); + +it('extracts selection/enum type', function () { + $mockLLM = Mockery::mock(LLM::class); + $mockLLM->shouldReceive('callFunction')->andReturnUsing( + fn() => '{"citizenshipGroup":"other"}', + ); + + $text = "His name is Jason, he is 28 years old and he lives in Germany."; + $age = (new Instructor(llm: $mockLLM))->respond( + messages: [ + ['role' => 'system', 'content' => $text], + ['role' => 'user', 'content' => 'What is Jason\'s citizenship?'], + ], + responseModel: Scalar::select(['US citizen', 'Canada citizen', 'other'], name: 'citizenshipGroup'), + ); + expect($age)->toBeString(); + expect($age)->toBe('other'); +}); diff --git a/tests/Feature/SymfonySchemaTest.php b/tests/Feature/SymfonySchemaTest.php new file mode 100644 index 00000000..9d510f20 --- /dev/null +++ b/tests/Feature/SymfonySchemaTest.php @@ -0,0 +1,33 @@ +fromClass(Events::class, 'addEvent', 'Extract object from provided content'); + expect($array)->toBeArray(); + expect($array['type'])->toEqual('function'); + expect($array['function']['name'])->toEqual('addEvent'); + expect($array['function']['description'])->toEqual('Extract object from provided content'); + expect($array['function']['parameters']['type'])->toEqual('object'); + expect($array['function']['parameters']['properties']['events']['type'])->toEqual('array'); + expect($array['function']['parameters']['properties']['events']['items']['type'])->toEqual('object'); + // ... + expect($array)->toMatchSnapshot(); +}); diff --git a/tests/Feature/ValidationTest.php b/tests/Feature/ValidationTest.php index ee6cd0a1..44c4f63a 100644 --- a/tests/Feature/ValidationTest.php +++ b/tests/Feature/ValidationTest.php @@ -3,7 +3,7 @@ use Cognesy\Instructor\Validators\Symfony\Validator; use Tests\Examples\Person; - +use Tests\Examples\PersonWithValidationMixin; it('validates using attribute rules', function () { $person = new Person(); @@ -21,3 +21,17 @@ $validator = new Validator(); expect($validator->validate($person))->toBe(false); }); + + +it('uses custom validation via ValidationMixin', function () { + $person = new PersonWithValidationMixin(); + $person->name = 'Jason'; + // age is less than 18 + $person->age = 12; + $validator = new Validator(); + expect($validator->validate($person))->toBe(false); + // age is more or equal to 18 + $person->age = 19; + $validator = new Validator(); + expect($validator->validate($person))->toBe(true); +});