diff --git a/examples/ClassificationMulticlass/run.php b/examples/ClassificationMulticlass/run.php index 532bc0d0..f0a7686c 100644 --- a/examples/ClassificationMulticlass/run.php +++ b/examples/ClassificationMulticlass/run.php @@ -18,20 +18,19 @@ enum Label : string { } /** Represents analysed ticket data */ -class Ticket { +class TicketLabels { /** @var Label[] */ - public array $ticketLabels = []; + public array $labels = []; } // Perform single-label classification on the input text. -function multi_classify(string $data) : Ticket { +function multi_classify(string $data) : TicketLabels { return (new Instructor())->respond( messages: [[ "role" => "user", - "content" => "Classify following support ticket: {$data}", + "content" => "Label following support ticket: {$data}", ]], - responseModel: Ticket::class, - model: "gpt-3.5-turbo-0613", + responseModel: TicketLabels::class, ); } @@ -39,6 +38,6 @@ function multi_classify(string $data) : Ticket { $ticket = "My account is locked and I can't access my billing info."; $prediction = multi_classify($ticket); -assert(in_array(Label::TECH_ISSUE, $prediction->classLabels)); -assert(in_array(Label::BILLING, $prediction->classLabels)); +assert(in_array(Label::TECH_ISSUE, $prediction->labels)); +assert(in_array(Label::BILLING, $prediction->labels)); dump($prediction); diff --git a/examples/RestatingInstructions/run.php b/examples/RestatingInstructions/run.php index 35b1ed37..3b1835e3 100644 --- a/examples/RestatingInstructions/run.php +++ b/examples/RestatingInstructions/run.php @@ -18,7 +18,7 @@ */ class Role { - /** Restate the instructions and rules to correctly determine the title. */ + /** Restate instructions and rules, so you can correctly determine the title. */ public string $instructions; /** Role description */ public string $description; @@ -26,6 +26,7 @@ class Role public string $title; } +/** Details of analyzed user. The key information we're looking for is appropriate role data. */ class UserDetail { public string $name; @@ -33,9 +34,9 @@ class UserDetail public Role $role; } -$user = (new Instructor)->respond( - messages: [["role" => "user", "content" => "I'm Jason, I'm 28 yo. I am responsible for driving growth of our company."]], +$instructor = new Instructor; +$user = ($instructor)->respond( + messages: [["role" => "user", "content" => "I'm Jason, I'm 28 yo. I am the head of Apex Software, responsible for driving growth of our company."]], responseModel: UserDetail::class, ); - dump($user); diff --git a/src/Instructor.php b/src/Instructor.php index 89ed91f6..ff599066 100644 --- a/src/Instructor.php +++ b/src/Instructor.php @@ -11,7 +11,8 @@ * Use respond() method to generate structured responses from LLM calls. */ class Instructor { - private LLM $llm; + protected LLM $llm; + protected $messages; public $retryPrompt = "Recall function correctly, fix following errors:"; public function __construct( @@ -89,4 +90,11 @@ public function json() : string { public function response() : array { return $this->llm->response(); } + + /** + * Most recent request sent to LLM + */ + public function request() : array { + return $this->llm->request(); + } } diff --git a/src/LLMs/OpenAI/LLM.php b/src/LLMs/OpenAI/LLM.php index 4566adba..93098c62 100644 --- a/src/LLMs/OpenAI/LLM.php +++ b/src/LLMs/OpenAI/LLM.php @@ -11,6 +11,7 @@ class LLM implements CanCallFunction { private Client $client; private CreateResponse $response; + private array $request; public function __construct( string $apiKey = '', @@ -34,7 +35,7 @@ public function callFunction( string $model = 'gpt-4-0125-preview', array $options = [] ) : string { - $this->response = $this->client->chat()->create(array_merge([ + $this->request = array_merge([ 'model' => $model, 'messages' => $messages, 'tools' => [$functionSchema], @@ -42,7 +43,8 @@ public function callFunction( 'type' => 'function', 'function' => ['name' => $functionName] ] - ], $options)); + ], $options); + $this->response = $this->client->chat()->create($this->request); return $this->data(); } @@ -50,6 +52,10 @@ public function response() : array { return $this->response->toArray(); } + public function request() : array { + return $this->request; + } + public function data() : string { return $this->response->choices[0]->message->toolCalls[0]->function->arguments ?? ''; } diff --git a/src/PropertyMap.php b/src/PropertyMap.php index d74bc7fd..a12066d3 100644 --- a/src/PropertyMap.php +++ b/src/PropertyMap.php @@ -8,7 +8,15 @@ class PropertyMap { private $map = []; - public function get(string $class, string $property) : Schema - { + public function register(string $class, string $property, Schema $schema) { + $this->map[$class][$property] = $schema; + } + + public function get(string $class, string $property) : Schema { + return $this->map[$class][$property]; + } + + public function has(string $class, string $property) : bool { + return isset($this->map[$class][$property]); } } \ No newline at end of file diff --git a/src/Schema/PropertyInfoBased/Data/Schema/ObjectRefSchema.php b/src/Schema/PropertyInfoBased/Data/Schema/ObjectRefSchema.php index 0ac96b53..bcdf2614 100644 --- a/src/Schema/PropertyInfoBased/Data/Schema/ObjectRefSchema.php +++ b/src/Schema/PropertyInfoBased/Data/Schema/ObjectRefSchema.php @@ -2,6 +2,8 @@ namespace Cognesy\Instructor\Schema\PropertyInfoBased\Data\Schema; +use Cognesy\Instructor\Schema\PropertyInfoBased\Data\Reference; + class ObjectRefSchema extends Schema { private string $defsLabel = 'definitions'; diff --git a/src/Schema/PropertyInfoBased/Data/Schema/ObjectSchema.php b/src/Schema/PropertyInfoBased/Data/Schema/ObjectSchema.php index a571721f..fd5d7721 100644 --- a/src/Schema/PropertyInfoBased/Data/Schema/ObjectSchema.php +++ b/src/Schema/PropertyInfoBased/Data/Schema/ObjectSchema.php @@ -31,9 +31,10 @@ public function toArray(callable $refCallback = null) : array } return array_filter([ 'type' => 'object', + 'title' => $this->name, + 'description' => $this->description, 'properties' => $propertyDefs, 'required' => $this->required, - 'description' => $this->description, ]); } } diff --git a/src/Schema/PropertyInfoBased/Factories/SchemaFactory.php b/src/Schema/PropertyInfoBased/Factories/SchemaFactory.php index c6681d86..c41d1d2a 100644 --- a/src/Schema/PropertyInfoBased/Factories/SchemaFactory.php +++ b/src/Schema/PropertyInfoBased/Factories/SchemaFactory.php @@ -2,6 +2,7 @@ namespace Cognesy\Instructor\Schema\PropertyInfoBased\Factories; +use Cognesy\Instructor\PropertyMap; use Cognesy\Instructor\Schema\PropertyInfoBased\Data\Schema\ArraySchema; use Cognesy\Instructor\Schema\PropertyInfoBased\Data\Schema\EnumSchema; use Cognesy\Instructor\Schema\PropertyInfoBased\Data\Schema\ObjectRefSchema; @@ -10,6 +11,7 @@ use Cognesy\Instructor\Schema\PropertyInfoBased\Data\Schema\Schema; use Cognesy\Instructor\Schema\PropertyInfoBased\Data\TypeDetails; use Cognesy\Instructor\Schema\PropertyInfoBased\Utils\ClassInfo; +use Cognesy\Instructor\SchemaMap; /** * Factory for creating schema objects from class names @@ -21,8 +23,13 @@ class SchemaFactory { /** @var bool allows to render schema with object properties inlined or referenced */ protected $useObjectReferences = false; + protected SchemaMap $schemaMap; + protected PropertyMap $propertyMap; - public function __construct() {} + public function __construct() { + $this->schemaMap = new SchemaMap; + $this->propertyMap = new PropertyMap; + } /** * Extracts the schema from a class and constructs a function call @@ -31,7 +38,59 @@ public function __construct() {} */ public function schema(string $anyType) : Schema { - return $this->makeSchema((new TypeDetailsFactory)->fromTypeName($anyType), '', ''); + if (!$this->schemaMap->has($anyType)) { + $this->schemaMap->register($anyType, $this->makeSchema((new TypeDetailsFactory)->fromTypeName($anyType))); + } + return $this->schemaMap->get($anyType); + } + + public function property(string $class, string $property) : Schema + { + if (!$this->propertyMap->has($class, $property)) { + $this->propertyMap->register($class, $property, $this->getPropertySchema($class, $property)); + } + return $this->propertyMap->get($class, $property); + } + + /** + * Gets all the property schemas of a class + * + * @param string $class + * @return Schema[] + */ + protected function getPropertySchemas(string $class) : array { + $properties = (new ClassInfo)->getProperties($class); + $propertySchemas = []; + foreach ($properties as $property) { + $propertySchemas[$property] = $this->property($class, $property); + } + return $propertySchemas; + } + + /** + * Gets the schema of a property + * + * @param string $class + * @param string $property + * @return Schema + */ + protected function getPropertySchema(string $class, string $property) : Schema { + $propertyInfoType = (new ClassInfo)->getType($class, $property); + $type = (new TypeDetailsFactory)->fromPropertyInfo($propertyInfoType); + $description = $this->getPropertyDescription($type, $class, $property); + return $this->makePropertySchema($type, $property, $description); + } + + protected function getPropertyDescription(TypeDetails $type, string $class, string $property) : string{ + if (in_array($type->type, ['object', 'enum'])) { + $classDescription = (new ClassInfo)->getClassDescription($type->class); + } else { + $classDescription = ''; + } + return implode("\n", array_filter([ + (new ClassInfo)->getPropertyDescription($class, $property), + $classDescription, + ])); } /** @@ -42,24 +101,28 @@ public function schema(string $anyType) : Schema * @param string $description * @return Schema */ - protected function makeSchema(TypeDetails $type, string $name, string $description) : Schema + protected function makeSchema(TypeDetails $type) : Schema { return match ($type->type) { 'object' => new ObjectSchema( $type, - $name, - $description, + $type->class, + (new ClassInfo)->getClassDescription($type->class), $this->getPropertySchemas($type->class), (new ClassInfo)->getRequiredProperties($type->class), ), - 'enum' => new EnumSchema($type, $name, $description), + 'enum' => new EnumSchema( + $type, + $type->class, + (new ClassInfo)->getClassDescription($type->class), + ), 'array' => new ArraySchema( $type, - $name, - $description, - $this->makePropertySchema($type, $name, $description), + '', + '', + $this->makePropertySchema($type, 'item', 'Array item'), ), - 'int', 'string', 'bool', 'float' => new ScalarSchema($type, $name, $description), + 'int', 'string', 'bool', 'float' => new ScalarSchema($type, 'value', 'Correctly extracted value'), default => throw new \Exception('Unknown type: '.$type->type), }; } @@ -81,7 +144,7 @@ protected function makePropertySchema(TypeDetails $type, string $name, string $d $type, $name, $description, - $this->makePropertySchema($type->nestedType, '', ''), + $this->makePropertySchema($type->nestedType, 'item', 'Array item'), ), 'int', 'string', 'bool', 'float' => new ScalarSchema($type, $name, $description), default => throw new \Exception('Unknown type: ' . $type->type), @@ -109,33 +172,4 @@ protected function makePropertyObject(TypeDetails $type, string $name, string $d (new ClassInfo)->getRequiredProperties($type->class), ); } - - /** - * Gets all the property schemas of a class - * - * @param string $class - * @return Schema[] - */ - protected function getPropertySchemas(string $class) : array { - $properties = (new ClassInfo)->getProperties($class); - $propertySchemas = []; - foreach ($properties as $property) { - $propertySchemas[$property] = $this->getPropertySchema($class, $property); - } - return $propertySchemas; - } - - /** - * Gets the schema of a property - * - * @param string $class - * @param string $property - * @return Schema - */ - protected function getPropertySchema(string $class, string $property) : Schema { - $propertyInfoType = (new ClassInfo)->getType($class, $property); - $propertyDescription = (new ClassInfo)->getDescription($class, $property); - $type = (new TypeDetailsFactory)->fromPropertyInfo($propertyInfoType); - return $this->makePropertySchema($type, $property, $propertyDescription); - } } \ No newline at end of file diff --git a/src/Schema/PropertyInfoBased/Utils/ClassInfo.php b/src/Schema/PropertyInfoBased/Utils/ClassInfo.php index a330d6be..47e76e79 100644 --- a/src/Schema/PropertyInfoBased/Utils/ClassInfo.php +++ b/src/Schema/PropertyInfoBased/Utils/ClassInfo.php @@ -42,7 +42,13 @@ public function getProperties(string $class) : array { return $this->extractor()->getProperties($class) ?? []; } - public function getDescription(string $class, string $property): string { + public function getClassDescription(string $class) : string { + // get class description from PHPDoc + $reflection = new ReflectionClass($class); + return DocstringUtils::descriptionsOnly($reflection->getDocComment()); + } + + public function getPropertyDescription(string $class, string $property): string { $extractor = $this->extractor(); return trim(implode(' ', [ $extractor->getShortDescription($class, $property), diff --git a/src/Schema/PropertyInfoBased/Utils/DocstringUtils.php b/src/Schema/PropertyInfoBased/Utils/DocstringUtils.php new file mode 100644 index 00000000..8845fd77 --- /dev/null +++ b/src/Schema/PropertyInfoBased/Utils/DocstringUtils.php @@ -0,0 +1,46 @@ +through(fn($code) => self::removeMarkers($code)) + ->through(fn($code) => self::removeAnnotations($code)) + ->then(fn($code) => trim($code)) + ->process($code); + } + + public static function removeMarkers(string $code): string + { + // Pattern to match comment markers + $pattern = '/(\/\*\*|\*\/|\/\/|#)/'; + + // Remove comment markers from the string + $cleanedString = preg_replace($pattern, '', $code); + + // Optional: Clean up extra asterisks and whitespace from multiline comments + $cleanedString = preg_replace('/^\s*\*\s?/m', '', $cleanedString); + + return $cleanedString; + } + + public static function removeAnnotations(string $code): string + { + $lines = explode("\n", $code); + $cleanedLines = []; + foreach ($lines as $line) { + $trimmed = trim($line); + if (empty($trimmed)) { + continue; + } + if ($trimmed[0] !== '@') { + $cleanedLines[] = $line; + } + } + return implode("\n", $cleanedLines); + } +} diff --git a/src/Schema/ReflectionBased/Reflection/PhpDoc/DocstringUtils.php b/src/Schema/ReflectionBased/Reflection/PhpDoc/DocstringUtils.php deleted file mode 100644 index 0227af27..00000000 --- a/src/Schema/ReflectionBased/Reflection/PhpDoc/DocstringUtils.php +++ /dev/null @@ -1,82 +0,0 @@ -through(fn($code) => self::removeMarkers($code)) - ->through(fn($code) => self::removeAnnotations($code)) - ->then(fn($code) => trim($code)) - ->process($code); - } - - public static function removeMarkers(string $code): string - { - // Pattern to match comment markers - $pattern = '/(\/\*\*|\*\/|\/\/|#)/'; - - // Remove comment markers from the string - $cleanedString = preg_replace($pattern, '', $code); - - // Optional: Clean up extra asterisks and whitespace from multiline comments - $cleanedString = preg_replace('/^\s*\*\s?/m', '', $cleanedString); - - return $cleanedString; - } - - public static function removeAnnotations(string $code): string - { - $lines = explode("\n", $code); - $cleanedLines = []; - foreach ($lines as $line) { - $trimmed = trim($line); - if (empty($trimmed)) { - continue; - } - if ($trimmed[0] !== '@') { - $cleanedLines[] = $line; - } - } - return implode("\n", $cleanedLines); - } - - static public function getPhpDocType(string $type) : array { - $isResolved = false; - $keyTypeName = ''; - $valueTypeName = ''; - - // case 1: array<> style type definition - if (str_starts_with($type, 'array<')) { - if (str_contains($type, ',')) { - // extract types from "array" type definition - $typeData = explode(',', substr($type, 6, -1)); - $keyTypeName = $typeData[0]; - $valueTypeName = $typeData[1]; - } else { - // extract types from "array" type definition - $keyTypeName = 'int'; - $valueTypeName = substr($type, 6, -1); - } - $isResolved = true; - } - - // case 2: itemType[] style type definition - if (str_ends_with($type, '[]')) { - // extract type from "valueType[]" type definition - $keyTypeName = 'int'; - $valueTypeName = substr($type, 0, -2); - $isResolved = true; - } - - // remove leading backslash from type name - if (str_starts_with($valueTypeName, '\\')) { - $valueTypeName = substr($valueTypeName, 1); - } - - return [$isResolved, $keyTypeName, $valueTypeName]; - } -} diff --git a/src/SchemaMap.php b/src/SchemaMap.php new file mode 100644 index 00000000..19a3cc7d --- /dev/null +++ b/src/SchemaMap.php @@ -0,0 +1,25 @@ +schemas[$typeName] = $schema; + } + + public function get(string $typeName) : Schema + { + return $this->schemas[$typeName]; + } + + public function has(string $typeName) : bool + { + return isset($this->schemas[$typeName]); + } +} \ No newline at end of file diff --git a/src/TypeMap.php b/src/TypeMap.php deleted file mode 100644 index 3ead6278..00000000 --- a/src/TypeMap.php +++ /dev/null @@ -1,20 +0,0 @@ -types[$typeName] = $schema; - } - - public function get(string $typeName) : Schema - { - return $this->types[$typeName]; - } -} \ No newline at end of file