diff --git a/evals/ComplexExtraction/run.php b/evals/ComplexExtraction/run.php index 58ffceab..503f0aaf 100644 --- a/evals/ComplexExtraction/run.php +++ b/evals/ComplexExtraction/run.php @@ -2,7 +2,7 @@ use Cognesy\Evals\ComplexExtraction\ProjectsEval; use Cognesy\Instructor\Enums\Mode; -use Cognesy\Instructor\Extras\Evals\Aggregators\AggregateExecutionObservation; +use Cognesy\Instructor\Extras\Evals\Aggregators\AggregateExperimentObservation; use Cognesy\Instructor\Extras\Evals\Enums\NumberAggregationMethod; use Cognesy\Instructor\Extras\Evals\Executors\Data\InferenceCases; use Cognesy\Instructor\Extras\Evals\Executors\Data\InstructorData; @@ -33,7 +33,7 @@ ), ], postprocessors: [ - new AggregateExecutionObservation( + new AggregateExperimentObservation( name: 'reliability', observationKey: 'execution.percentFound', method: NumberAggregationMethod::Mean, diff --git a/evals/LLMModes/run.php b/evals/LLMModes/run.php index 83fa6184..b8010414 100644 --- a/evals/LLMModes/run.php +++ b/evals/LLMModes/run.php @@ -5,7 +5,7 @@ use Cognesy\Evals\LLMModes\CompanyEval; use Cognesy\Instructor\Enums\Mode; -use Cognesy\Instructor\Extras\Evals\Aggregators\AggregateExecutionObservation; +use Cognesy\Instructor\Extras\Evals\Aggregators\AggregateExperimentObservation; use Cognesy\Instructor\Extras\Evals\Enums\NumberAggregationMethod; use Cognesy\Instructor\Extras\Evals\Experiment; use Cognesy\Instructor\Extras\Evals\Executors\Data\InferenceCases; @@ -57,9 +57,9 @@ ]), ], postprocessors: [ - new AggregateExecutionObservation( - name: 'reliability', - observationKey: 'is_correct', + new AggregateExperimentObservation( + name: 'experiment.reliability', + observationKey: 'execution.is_correct', method: NumberAggregationMethod::Mean, ), ] diff --git a/evals/SimpleExtraction/run.php b/evals/SimpleExtraction/run.php index 00097182..95c5afee 100644 --- a/evals/SimpleExtraction/run.php +++ b/evals/SimpleExtraction/run.php @@ -3,7 +3,7 @@ use Cognesy\Evals\SimpleExtraction\Company; use Cognesy\Evals\SimpleExtraction\CompanyEval; use Cognesy\Instructor\Enums\Mode; -use Cognesy\Instructor\Extras\Evals\Aggregators\AggregateExecutionObservation; +use Cognesy\Instructor\Extras\Evals\Aggregators\AggregateExperimentObservation; use Cognesy\Instructor\Extras\Evals\Enums\NumberAggregationMethod; use Cognesy\Instructor\Extras\Evals\Experiment; use Cognesy\Instructor\Extras\Evals\Executors\Data\InferenceCases; @@ -36,12 +36,12 @@ ]), ], postprocessors: [ - new AggregateExecutionObservation( + new AggregateExperimentObservation( name: 'experiment.reliability', observationKey: 'execution.is_correct', method: NumberAggregationMethod::Mean, ), - new AggregateExecutionObservation( + new AggregateExperimentObservation( name: 'latency', observationKey: 'execution.timeElapsed', params: ['percentile' => 95], diff --git a/src/Extras/Evals/Aggregators/AggregateExecutionObservation.php b/src/Extras/Evals/Aggregators/AggregateExperimentObservation.php similarity index 76% rename from src/Extras/Evals/Aggregators/AggregateExecutionObservation.php rename to src/Extras/Evals/Aggregators/AggregateExperimentObservation.php index 2923d714..ed4ff6cc 100644 --- a/src/Extras/Evals/Aggregators/AggregateExecutionObservation.php +++ b/src/Extras/Evals/Aggregators/AggregateExperimentObservation.php @@ -10,7 +10,7 @@ use Cognesy\Instructor\Extras\Evals\Utils\NumberSeriesAggregator; use InvalidArgumentException; -class AggregateExecutionObservation implements CanObserveExperiment +class AggregateExperimentObservation implements CanObserveExperiment { public function __construct( private string $name = '', @@ -40,14 +40,18 @@ public function observe(Experiment $experiment): Observation { } private function calculate(Experiment $experiment) : float|int { + $observations = SelectObservations::from([ + $experiment->observations(), + $experiment->executionObservations(), + ])->withKey($this->observationKey)->get(); + + $values = array_map( + callback: fn($observation) => $observation->toFloat(), + array: $observations, + ); + return (new NumberSeriesAggregator( - values: array_map( - callback: fn($observation) => $observation->toFloat(), - array: SelectObservations::from([ - $experiment->observations(), - $experiment->executionObservations(), - ])->withKeys([$this->observationKey])->get(), - ), + values: $values, params: $this->params, method: $this->method) )->aggregate(); diff --git a/src/Extras/Evals/Console/Display.php b/src/Extras/Evals/Console/Display.php index 12b62179..390b6408 100644 --- a/src/Extras/Evals/Console/Display.php +++ b/src/Extras/Evals/Console/Display.php @@ -4,6 +4,7 @@ use Cognesy\Instructor\Extras\Evals\Execution; use Cognesy\Instructor\Extras\Evals\Experiment; +use Cognesy\Instructor\Extras\Evals\Observation\SelectObservations; use Cognesy\Instructor\Utils\Cli\Color; use Cognesy\Instructor\Utils\Cli\Console; use Cognesy\Instructor\Utils\Debug\Debug; @@ -12,13 +13,19 @@ class Display { + private int $terminalWidth = 120; + + public function __construct(array $options = []) { + $this->terminalWidth = Console::getWidth(); + } + public function header(Experiment $experiment) : void { Console::println(''); Console::printColumns([ [22, ' EXPERIMENT (' . Str::limit(text: $experiment->id(), limit: 4, align: STR_PAD_LEFT, fit: false) . ") ", STR_PAD_RIGHT, [Color::BG_BLUE, Color::WHITE, Color::BOLD]], - [70, ' ', STR_PAD_LEFT, [Color::BG_GRAY, Color::DARK_GRAY]], + [$this->flex(22, 30, -2), ' ', STR_PAD_LEFT, [Color::BG_GRAY, Color::DARK_GRAY]], [30, ' ' . $experiment->startedAt()->format('Y-m-d H:i:s') . ' ', STR_PAD_LEFT, [Color::BG_GRAY, Color::DARK_GRAY]], - ], 120, ''); + ], $this->terminalWidth, ''); Console::println(''); Console::println(''); } @@ -27,8 +34,9 @@ public function footer(Experiment $experiment) { Console::println(''); Console::printColumns([ [20, number_format($experiment->timeElapsed(), 2) . ' sec ', STR_PAD_LEFT, [Color::BG_BLUE, Color::WHITE, Color::BOLD]], - [100, ' ' . $experiment->usage()->toString() . ' ', STR_PAD_LEFT, [Color::BG_GRAY, Color::DARK_GRAY]], - ], 120, ''); + [$this->flex(20, 50), ' ', STR_PAD_LEFT, [Color::BG_GRAY, Color::DARK_GRAY]], + [50, ' ' . $experiment->usage()->toString() . ' ', STR_PAD_LEFT, [Color::BG_GRAY, Color::DARK_GRAY]], + ], $this->terminalWidth, ''); Console::println(''); Console::println(''); $this->displayObservations($experiment); @@ -44,7 +52,7 @@ public function before(Execution $execution) : void { [10, $connection, STR_PAD_RIGHT, Color::WHITE], [11, $mode, STR_PAD_RIGHT, Color::YELLOW], [8, $streamed ? 'stream' : 'sync', STR_PAD_LEFT, $streamed ? Color::BLUE : Color::DARK_BLUE], - ], 80); + ], $this->terminalWidth); Console::print('', [Color::GRAY, Color::BG_BLACK]); } @@ -65,7 +73,7 @@ public function displayExceptions(array $exceptions) : void { Console::printColumns([ [30, $key, STR_PAD_RIGHT, [Color::DARK_YELLOW]], [100, $exLine, STR_PAD_RIGHT, [Color::WHITE]] - ], 120); + ], $this->terminalWidth); Console::println(''); Console::println($exception->getMessage(), [Color::GRAY]); if (Debug::isEnabled()) { @@ -82,38 +90,27 @@ private function displayResult(Execution $execution) : void { $answerLine = str_replace("\n", '\n', $answer); $timeElapsed = $execution->timeElapsed(); $tokensPerSec = $execution->outputTps(); - - $columns = array_merge([ - [9, $this->timeFormat($timeElapsed), STR_PAD_LEFT, [Color::DARK_YELLOW]], - [10, $this->tokensPerSecFormat($tokensPerSec), STR_PAD_LEFT, [Color::CYAN]], - ], - $this->makeEvalColumns($execution), - [ - [60, $answerLine, STR_PAD_RIGHT, [Color::WHITE, Color::BG_BLACK]] - ], - ); - - echo Console::columns($columns, 120); - } - - private function makeEvalColumns(Execution $execution, int $maxCols = 3) : array { - $columns = []; - $count = 0; - foreach ($execution->summaries() as $aggregate) { - $columns[] = [6, $metric->toString(), STR_PAD_BOTH, $metric->toCliColor()]; - $count++; - if ($count >= $maxCols) { - break; - } - } - foreach ($execution->observations() as $observation) { - $columns[] = [6, $observation->value(), STR_PAD_BOTH, [Color::GRAY]]; - $count++; - if ($count >= $maxCols) { - break; - } - } - return $columns; + $isCorrect = SelectObservations::from($execution->observations())->withKeys(['execution.is_correct'])->sole()->value(); + + $rowStatus = match($isCorrect) { + 1 => 'OK', + 0 => 'FAIL', + default => '????', + }; + $cliColor = match($isCorrect) { + 1 => [Color::BG_GREEN, Color::WHITE], + 0 => [Color::BG_RED, Color::WHITE], + default => [Color::BG_BLACK, Color::RED], + }; + + $columns = [ + [9, $this->timeFormat($timeElapsed), STR_PAD_LEFT, [Color::DARK_YELLOW]], + [10, $this->tokensPerSecFormat($tokensPerSec), STR_PAD_LEFT, [Color::CYAN]], + [6, $rowStatus, STR_PAD_BOTH, $cliColor], + [60, $answerLine, STR_PAD_RIGHT, [Color::WHITE, Color::BG_BLACK]] + ]; + + echo Console::columns($columns, $this->terminalWidth); } private function displayException(Exception $exception) : void { @@ -122,7 +119,7 @@ private function displayException(Exception $exception) : void { [10, '', STR_PAD_LEFT, [Color::CYAN]], [6, '!!!!', STR_PAD_BOTH, [Color::WHITE, COLOR::BOLD, Color::BG_MAGENTA]], [60, $this->exceptionToText($exception, 80), STR_PAD_RIGHT, [Color::RED, Color::BG_BLACK]], - ], 120); + ], $this->terminalWidth); } @@ -144,13 +141,30 @@ private function exceptionToText(Exception $e, int $maxLen) : string { private function displayObservations(Experiment $experiment) { - Console::println('RESULTS:', [Color::WHITE, Color::BOLD]); + Console::println('SUMMARY:', [Color::WHITE, Color::BOLD]); foreach ($experiment->observations() as $observation) { + //$format = $observation->metadata()->get('format', '%s'); + $value = $observation->value(); + $unit = $observation->metadata()->get('unit', '-'); + $meta = Str::limit($observation->metadata()->except('experimentId')->toJson(), 60); + Console::printColumns([ - [20, $observation->key(), STR_PAD_LEFT, [Color::DARK_GRAY]], - [20, $observation->value(), STR_PAD_RIGHT, [Color::WHITE]], - ], 120); + [5, $observation->id(), STR_PAD_LEFT, [Color::DARK_GRAY]], + [25, $observation->key(), STR_PAD_LEFT, [Color::DARK_GRAY]], + [20, $value, STR_PAD_LEFT, [Color::WHITE]], + [10, $unit, STR_PAD_RIGHT, [Color::DARK_GRAY]], + [$this->flex(5,25,20,10), $meta, STR_PAD_RIGHT, [Color::GRAY]], + ], $this->terminalWidth); Console::println(''); } } + + private function flex(int ...$cols) : int { + $flex = 0; + foreach ($cols as $col) { + $flex += $col; + } + $count = count($cols) + 1; + return $this->terminalWidth - $flex - $count; + } } \ No newline at end of file diff --git a/src/Extras/Evals/Execution.php b/src/Extras/Evals/Execution.php index f78fee4c..53686206 100644 --- a/src/Extras/Evals/Execution.php +++ b/src/Extras/Evals/Execution.php @@ -25,8 +25,8 @@ class Execution ]; private CanRunExecution $action; - private array $processors; - private array $postprocessors; + private array $processors = []; + private array $postprocessors = []; private string $id; private ?DateTime $startedAt = null; @@ -108,14 +108,19 @@ public function execute() : void { $this->usage = $usage; $observations = MakeObservations::for($this) - ->withSources($this->processors()) + ->withSources([ + $this->processors, + $this->defaultObservers, + ]) ->only([ CanObserveExecution::class, CanProvideExecutionObservations::class, ]); $summaries = MakeObservations::for($this) - ->withSources($this->postprocessors) + ->withSources([ + $this->postprocessors + ]) ->only([ CanSummarizeExecution::class, CanProvideExecutionObservations::class, @@ -139,7 +144,7 @@ public function hasObservations() : bool { // HELPERS ////////////////////////////////////////////////// - public function exception() : Exception { + public function exception() : ?Exception { return $this->exception; } @@ -207,7 +212,7 @@ public function hasFeedback() : bool { * @return Observation[] */ public function summaries() : array { - return SelectObservations::from($this->observations) + return SelectObservations::from([$this->observations]) ->withTypes(['summary']) ->all(); } @@ -215,13 +220,4 @@ public function summaries() : array { public function hasSummaries() : bool { return count($this->summaries()) > 0; } - - // INTERNAL ///////////////////////////////////////////////// - - /** - * @return Observation[] - */ - private function processors() : array { - return array_merge($this->defaultObservers, $this->processors); - } } diff --git a/src/Extras/Evals/Experiment.php b/src/Extras/Evals/Experiment.php index b2fbf892..0a94253b 100644 --- a/src/Extras/Evals/Experiment.php +++ b/src/Extras/Evals/Experiment.php @@ -27,8 +27,8 @@ class Experiment { private Display $display; private Generator $cases; private CanRunExecution $executor; - private array $processors; - private array $postprocessors; + private array $processors = []; + private array $postprocessors = []; readonly private string $id; private ?DateTime $startedAt = null; @@ -42,7 +42,7 @@ class Experiment { private array $exceptions = []; /** @var Observation[] */ - private array $observations; + private array $observations = []; public function __construct( Generator $cases, @@ -101,35 +101,14 @@ public function execute() : array { } $this->usage = $this->accumulateUsage(); $this->timeElapsed = microtime(true) - $this->startedAt->getTimestamp(); + + $this->observations = $this->makeObservations(); + $this->display->footer($this); if (!empty($this->exceptions)) { $this->display->displayExceptions($this->exceptions); } - // execute observers - $observations = MakeObservations::for($this) - ->withSources([ - $this->processors, - $this->defaultProcessors, - ]) - ->only([ - CanObserveExperiment::class, - CanObserveExecution::class, - CanProvideExecutionObservations::class, - ]); - - // execute summarizers - $summaries = MakeObservations::for($this) - ->withSources([ - $this->postprocessors, - ]) - ->only([ - CanSummarizeExperiment::class, - CanProvideExecutionObservations::class, - ]); - - $this->observations = array_filter(array_merge($observations, $summaries)); - return $this->summaries(); } @@ -207,7 +186,8 @@ private function makeExecution(mixed $case) : Execution { }; return (new Execution(case: $caseData)) ->withExecutor($this->executor) - ->withProcessors($this->processors); + ->withProcessors($this->processors) + ->withPostprocessors($this->postprocessors); } private function accumulateUsage() : Usage { @@ -217,4 +197,31 @@ private function accumulateUsage() : Usage { } return $usage; } + + private function makeObservations() : array { + // execute observers + $observations = MakeObservations::for($this) + ->withSources([ + $this->processors, + $this->defaultProcessors, + ]) + ->only([ + CanObserveExperiment::class, + CanObserveExecution::class, + CanProvideExecutionObservations::class, + ]); + + // execute summarizers + $summaries = MakeObservations::for($this) + ->withSources([ + $this->postprocessors, + ]) + ->only([ + CanSummarizeExperiment::class, + CanObserveExperiment::class, + CanProvideExecutionObservations::class, + ]); + + return array_filter(array_merge($observations, $summaries)); + } } diff --git a/src/Extras/Evals/Observation/MakeObservations.php b/src/Extras/Evals/Observation/MakeObservations.php index 6ce039e7..30384b58 100644 --- a/src/Extras/Evals/Observation/MakeObservations.php +++ b/src/Extras/Evals/Observation/MakeObservations.php @@ -20,24 +20,17 @@ public function __construct( private ?Execution $execution = null, ) {} -// public static function forExperiment(Experiment $experiment) : self { -// return new self(experiment: $experiment); -// } -// -// public static function forExecution(Execution $execution) : self { -// return new self(execution: $execution); -// } - - public function for(Experiment|Execution $subject) : self { - if ($subject instanceof Experiment) { - $this->experiment = $subject; - } else { - $this->execution = $subject; - } - return $this; + public static function for(Experiment|Execution $subject) : self { + return new self( + experiment: $subject instanceof Experiment ? $subject : null, + execution: $subject instanceof Execution ? $subject : null, + ); } public function withSources(array $sources) : self { + if (is_array($sources[0] ?? null)) { + $sources = array_merge(...$sources); + } $this->sources = $sources; return $this; } @@ -56,7 +49,7 @@ public function observations(array $types = null) : array { $observations = []; foreach ($this->sources($this->sources, $types) as $source) { $observations[] = match(true) { - $source instanceof CanProvideExecutionObservations => $source->observations(), + $source instanceof CanProvideExecutionObservations => $source->observations($this->execution), $source instanceof CanObserveExperiment => $this->wrapObservation($source->observe(...), $this->experiment), $source instanceof CanSummarizeExperiment => $this->wrapObservation($source->summarize(...), $this->experiment), $source instanceof CanObserveExecution => $this->wrapObservation($source->observe(...), $this->execution), diff --git a/src/Extras/Evals/Observation/SelectObservations.php b/src/Extras/Evals/Observation/SelectObservations.php index 63726cd4..c4434c62 100644 --- a/src/Extras/Evals/Observation/SelectObservations.php +++ b/src/Extras/Evals/Observation/SelectObservations.php @@ -11,7 +11,19 @@ private function __construct( ) {} public static function from(array $sources) : self { - return new self(array_merge(...$sources)); + if (is_array($sources[0] ?? null)) { + $sources = array_merge(...$sources); + } + return new self($sources); + } + + public function withKey(string $key) : self { + return new SelectObservations( + array_filter( + $this->observations, + fn($observation) => $observation->key() === $key + ) + ); } public function withKeys(array $keys) : self { diff --git a/src/Utils/Cli/Console.php b/src/Utils/Cli/Console.php index 6ab8ef95..1060da7b 100644 --- a/src/Utils/Cli/Console.php +++ b/src/Utils/Cli/Console.php @@ -82,7 +82,7 @@ static private function color(string|array $color, string $output = '') : string }; } - static private function getWidth() : int { + public static function getWidth() : int { return (int) exec('tput cols'); } } diff --git a/src/Utils/DataMap.php b/src/Utils/DataMap.php index ee08b5ff..9424e4b5 100644 --- a/src/Utils/DataMap.php +++ b/src/Utils/DataMap.php @@ -92,19 +92,13 @@ public function getType(string $key): string return gettype($value); } - public function merge(array $data): self { - $this->dot->merge($data); - return $this; - } - /** * Magic getter. * * @param string $name * @return mixed */ - public function __get(string $name): mixed - { + public function __get(string $name): mixed { return $this->get($name); } @@ -115,8 +109,7 @@ public function __get(string $name): mixed * @param mixed $value * @return void */ - public function __set(string $name, mixed $value): void - { + public function __set(string $name, mixed $value): void { $this->set($name, $value); } @@ -126,8 +119,7 @@ public function __set(string $name, mixed $value): void * @param string $name * @return bool */ - public function __isset(string $name): bool - { + public function __isset(string $name): bool { return $this->has($name); } @@ -137,8 +129,7 @@ public function __isset(string $name): bool * @param int $options * @return string */ - public function toJson(int $options = 0): string - { + public function toJson(int $options = 0): string { return json_encode($this->toArray(), $options); } @@ -150,8 +141,7 @@ public function toJson(int $options = 0): string * * @throws InvalidArgumentException If the JSON is invalid. */ - public static function fromJson(string $json): self - { + public static function fromJson(string $json): self { $data = json_decode($json, true); if (json_last_error() !== JSON_ERROR_NONE) { @@ -166,8 +156,7 @@ public static function fromJson(string $json): self * * @return array */ - public function toArray(): array - { + public function toArray(): array { return $this->dot->all(); } @@ -177,8 +166,7 @@ public function toArray(): array * @param array $array * @return self */ - public static function fromArray(array $array): self - { + public static function fromArray(array $array): self { return new self($array); } @@ -187,11 +175,21 @@ public static function fromArray(array $array): self * * @return array */ - public function fields(): array - { + public function fields(): array { return array_keys($this->toArray()); } + /** + * Merge data into the DataMap. + * + * @param array $data + * @return self + */ + public function merge(array $data): self { + $this->dot->merge($data); + return $this; + } + /** * Perform operations on the DataMap using Aimeos\Map. * @@ -200,8 +198,7 @@ public function fields(): array * * @throws InvalidArgumentException If the path does not lead to an array or DataMap, or if wildcards are used incorrectly. */ - public function toMap(?string $path = null): Map - { + public function toMap(?string $path = null): Map { if ($path === null) { // No path provided, return Map for the entire data $data = $this->toArray(); @@ -218,6 +215,42 @@ public function toMap(?string $path = null): Map return new Map($collectedValues); } + /** + * Specify data which should be serialized to JSON. + * + * @return mixed + */ + public function jsonSerialize(): mixed { + return $this->toArray(); + } + + /** + * Get a subset of the DataMap. + * + * @param string ...$keys + * @return self + */ + public function except(string ...$keys): self { + $data = $this->toArray(); + foreach ($keys as $key) { + unset($data[$key]); + } + return new self($data); + } + + /** + * Get a subset of the DataMap. + * + * @param string ...$keys + * @return self + */ + public function only(string ...$keys): self { + $data = $this->toArray(); + return new self(array_intersect_key($data, array_flip($keys))); + } + + // INTERNAL ///////////////////////////////////////////////// + /** * Collect values from the DataMap based on a path. * @@ -299,14 +332,4 @@ private function traverseWithWildcards(mixed $currentData, array $pathParts): ar return $collected; } - - /** - * Specify data which should be serialized to JSON. - * - * @return mixed - */ - public function jsonSerialize(): mixed - { - return $this->toArray(); - } }