Skip to content

Commit

Permalink
Evals - cleaning up
Browse files Browse the repository at this point in the history
  • Loading branch information
ddebowczyk committed Oct 23, 2024
1 parent 4469eb7 commit a57863c
Show file tree
Hide file tree
Showing 11 changed files with 203 additions and 154 deletions.
4 changes: 2 additions & 2 deletions evals/ComplexExtraction/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

use Cognesy\Evals\ComplexExtraction\ProjectsEval;
use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Evals\Aggregators\AggregateExecutionObservation;
use Cognesy\Instructor\Extras\Evals\Aggregators\AggregateExperimentObservation;
use Cognesy\Instructor\Extras\Evals\Enums\NumberAggregationMethod;
use Cognesy\Instructor\Extras\Evals\Executors\Data\InferenceCases;
use Cognesy\Instructor\Extras\Evals\Executors\Data\InstructorData;
Expand Down Expand Up @@ -33,7 +33,7 @@
),
],
postprocessors: [
new AggregateExecutionObservation(
new AggregateExperimentObservation(
name: 'reliability',
observationKey: 'execution.percentFound',
method: NumberAggregationMethod::Mean,
Expand Down
8 changes: 4 additions & 4 deletions evals/LLMModes/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

use Cognesy\Evals\LLMModes\CompanyEval;
use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Evals\Aggregators\AggregateExecutionObservation;
use Cognesy\Instructor\Extras\Evals\Aggregators\AggregateExperimentObservation;
use Cognesy\Instructor\Extras\Evals\Enums\NumberAggregationMethod;
use Cognesy\Instructor\Extras\Evals\Experiment;
use Cognesy\Instructor\Extras\Evals\Executors\Data\InferenceCases;
Expand Down Expand Up @@ -57,9 +57,9 @@
]),
],
postprocessors: [
new AggregateExecutionObservation(
name: 'reliability',
observationKey: 'is_correct',
new AggregateExperimentObservation(
name: 'experiment.reliability',
observationKey: 'execution.is_correct',
method: NumberAggregationMethod::Mean,
),
]
Expand Down
6 changes: 3 additions & 3 deletions evals/SimpleExtraction/run.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use Cognesy\Evals\SimpleExtraction\Company;
use Cognesy\Evals\SimpleExtraction\CompanyEval;
use Cognesy\Instructor\Enums\Mode;
use Cognesy\Instructor\Extras\Evals\Aggregators\AggregateExecutionObservation;
use Cognesy\Instructor\Extras\Evals\Aggregators\AggregateExperimentObservation;
use Cognesy\Instructor\Extras\Evals\Enums\NumberAggregationMethod;
use Cognesy\Instructor\Extras\Evals\Experiment;
use Cognesy\Instructor\Extras\Evals\Executors\Data\InferenceCases;
Expand Down Expand Up @@ -36,12 +36,12 @@
]),
],
postprocessors: [
new AggregateExecutionObservation(
new AggregateExperimentObservation(
name: 'experiment.reliability',
observationKey: 'execution.is_correct',
method: NumberAggregationMethod::Mean,
),
new AggregateExecutionObservation(
new AggregateExperimentObservation(
name: 'latency',
observationKey: 'execution.timeElapsed',
params: ['percentile' => 95],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
use Cognesy\Instructor\Extras\Evals\Utils\NumberSeriesAggregator;
use InvalidArgumentException;

class AggregateExecutionObservation implements CanObserveExperiment
class AggregateExperimentObservation implements CanObserveExperiment
{
public function __construct(
private string $name = '',
Expand Down Expand Up @@ -40,14 +40,18 @@ public function observe(Experiment $experiment): Observation {
}

private function calculate(Experiment $experiment) : float|int {
$observations = SelectObservations::from([
$experiment->observations(),
$experiment->executionObservations(),
])->withKey($this->observationKey)->get();

$values = array_map(
callback: fn($observation) => $observation->toFloat(),
array: $observations,
);

return (new NumberSeriesAggregator(
values: array_map(
callback: fn($observation) => $observation->toFloat(),
array: SelectObservations::from([
$experiment->observations(),
$experiment->executionObservations(),
])->withKeys([$this->observationKey])->get(),
),
values: $values,
params: $this->params,
method: $this->method)
)->aggregate();
Expand Down
100 changes: 57 additions & 43 deletions src/Extras/Evals/Console/Display.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use Cognesy\Instructor\Extras\Evals\Execution;
use Cognesy\Instructor\Extras\Evals\Experiment;
use Cognesy\Instructor\Extras\Evals\Observation\SelectObservations;
use Cognesy\Instructor\Utils\Cli\Color;
use Cognesy\Instructor\Utils\Cli\Console;
use Cognesy\Instructor\Utils\Debug\Debug;
Expand All @@ -12,13 +13,19 @@

class Display
{
private int $terminalWidth = 120;

public function __construct(array $options = []) {
$this->terminalWidth = Console::getWidth();
}

public function header(Experiment $experiment) : void {
Console::println('');
Console::printColumns([
[22, ' EXPERIMENT (' . Str::limit(text: $experiment->id(), limit: 4, align: STR_PAD_LEFT, fit: false) . ") ", STR_PAD_RIGHT, [Color::BG_BLUE, Color::WHITE, Color::BOLD]],
[70, ' ', STR_PAD_LEFT, [Color::BG_GRAY, Color::DARK_GRAY]],
[$this->flex(22, 30, -2), ' ', STR_PAD_LEFT, [Color::BG_GRAY, Color::DARK_GRAY]],
[30, ' ' . $experiment->startedAt()->format('Y-m-d H:i:s') . ' ', STR_PAD_LEFT, [Color::BG_GRAY, Color::DARK_GRAY]],
], 120, '');
], $this->terminalWidth, '');
Console::println('');
Console::println('');
}
Expand All @@ -27,8 +34,9 @@ public function footer(Experiment $experiment) {
Console::println('');
Console::printColumns([
[20, number_format($experiment->timeElapsed(), 2) . ' sec ', STR_PAD_LEFT, [Color::BG_BLUE, Color::WHITE, Color::BOLD]],
[100, ' ' . $experiment->usage()->toString() . ' ', STR_PAD_LEFT, [Color::BG_GRAY, Color::DARK_GRAY]],
], 120, '');
[$this->flex(20, 50), ' ', STR_PAD_LEFT, [Color::BG_GRAY, Color::DARK_GRAY]],
[50, ' ' . $experiment->usage()->toString() . ' ', STR_PAD_LEFT, [Color::BG_GRAY, Color::DARK_GRAY]],
], $this->terminalWidth, '');
Console::println('');
Console::println('');
$this->displayObservations($experiment);
Expand All @@ -44,7 +52,7 @@ public function before(Execution $execution) : void {
[10, $connection, STR_PAD_RIGHT, Color::WHITE],
[11, $mode, STR_PAD_RIGHT, Color::YELLOW],
[8, $streamed ? 'stream' : 'sync', STR_PAD_LEFT, $streamed ? Color::BLUE : Color::DARK_BLUE],
], 80);
], $this->terminalWidth);
Console::print('', [Color::GRAY, Color::BG_BLACK]);
}

Expand All @@ -65,7 +73,7 @@ public function displayExceptions(array $exceptions) : void {
Console::printColumns([
[30, $key, STR_PAD_RIGHT, [Color::DARK_YELLOW]],
[100, $exLine, STR_PAD_RIGHT, [Color::WHITE]]
], 120);
], $this->terminalWidth);
Console::println('');
Console::println($exception->getMessage(), [Color::GRAY]);
if (Debug::isEnabled()) {
Expand All @@ -82,38 +90,27 @@ private function displayResult(Execution $execution) : void {
$answerLine = str_replace("\n", '\n', $answer);
$timeElapsed = $execution->timeElapsed();
$tokensPerSec = $execution->outputTps();

$columns = array_merge([
[9, $this->timeFormat($timeElapsed), STR_PAD_LEFT, [Color::DARK_YELLOW]],
[10, $this->tokensPerSecFormat($tokensPerSec), STR_PAD_LEFT, [Color::CYAN]],
],
$this->makeEvalColumns($execution),
[
[60, $answerLine, STR_PAD_RIGHT, [Color::WHITE, Color::BG_BLACK]]
],
);

echo Console::columns($columns, 120);
}

private function makeEvalColumns(Execution $execution, int $maxCols = 3) : array {
$columns = [];
$count = 0;
foreach ($execution->summaries() as $aggregate) {
$columns[] = [6, $metric->toString(), STR_PAD_BOTH, $metric->toCliColor()];
$count++;
if ($count >= $maxCols) {
break;
}
}
foreach ($execution->observations() as $observation) {
$columns[] = [6, $observation->value(), STR_PAD_BOTH, [Color::GRAY]];
$count++;
if ($count >= $maxCols) {
break;
}
}
return $columns;
$isCorrect = SelectObservations::from($execution->observations())->withKeys(['execution.is_correct'])->sole()->value();

$rowStatus = match($isCorrect) {
1 => 'OK',
0 => 'FAIL',
default => '????',
};
$cliColor = match($isCorrect) {
1 => [Color::BG_GREEN, Color::WHITE],
0 => [Color::BG_RED, Color::WHITE],
default => [Color::BG_BLACK, Color::RED],
};

$columns = [
[9, $this->timeFormat($timeElapsed), STR_PAD_LEFT, [Color::DARK_YELLOW]],
[10, $this->tokensPerSecFormat($tokensPerSec), STR_PAD_LEFT, [Color::CYAN]],
[6, $rowStatus, STR_PAD_BOTH, $cliColor],
[60, $answerLine, STR_PAD_RIGHT, [Color::WHITE, Color::BG_BLACK]]
];

echo Console::columns($columns, $this->terminalWidth);
}

private function displayException(Exception $exception) : void {
Expand All @@ -122,7 +119,7 @@ private function displayException(Exception $exception) : void {
[10, '', STR_PAD_LEFT, [Color::CYAN]],
[6, '!!!!', STR_PAD_BOTH, [Color::WHITE, COLOR::BOLD, Color::BG_MAGENTA]],
[60, $this->exceptionToText($exception, 80), STR_PAD_RIGHT, [Color::RED, Color::BG_BLACK]],
], 120);
], $this->terminalWidth);
}


Expand All @@ -144,13 +141,30 @@ private function exceptionToText(Exception $e, int $maxLen) : string {

private function displayObservations(Experiment $experiment)
{
Console::println('RESULTS:', [Color::WHITE, Color::BOLD]);
Console::println('SUMMARY:', [Color::WHITE, Color::BOLD]);
foreach ($experiment->observations() as $observation) {
//$format = $observation->metadata()->get('format', '%s');
$value = $observation->value();
$unit = $observation->metadata()->get('unit', '-');
$meta = Str::limit($observation->metadata()->except('experimentId')->toJson(), 60);

Console::printColumns([
[20, $observation->key(), STR_PAD_LEFT, [Color::DARK_GRAY]],
[20, $observation->value(), STR_PAD_RIGHT, [Color::WHITE]],
], 120);
[5, $observation->id(), STR_PAD_LEFT, [Color::DARK_GRAY]],
[25, $observation->key(), STR_PAD_LEFT, [Color::DARK_GRAY]],
[20, $value, STR_PAD_LEFT, [Color::WHITE]],
[10, $unit, STR_PAD_RIGHT, [Color::DARK_GRAY]],
[$this->flex(5,25,20,10), $meta, STR_PAD_RIGHT, [Color::GRAY]],
], $this->terminalWidth);
Console::println('');
}
}

private function flex(int ...$cols) : int {
$flex = 0;
foreach ($cols as $col) {
$flex += $col;
}
$count = count($cols) + 1;
return $this->terminalWidth - $flex - $count;
}
}
26 changes: 11 additions & 15 deletions src/Extras/Evals/Execution.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ class Execution
];

private CanRunExecution $action;
private array $processors;
private array $postprocessors;
private array $processors = [];
private array $postprocessors = [];

private string $id;
private ?DateTime $startedAt = null;
Expand Down Expand Up @@ -108,14 +108,19 @@ public function execute() : void {
$this->usage = $usage;

$observations = MakeObservations::for($this)
->withSources($this->processors())
->withSources([
$this->processors,
$this->defaultObservers,
])
->only([
CanObserveExecution::class,
CanProvideExecutionObservations::class,
]);

$summaries = MakeObservations::for($this)
->withSources($this->postprocessors)
->withSources([
$this->postprocessors
])
->only([
CanSummarizeExecution::class,
CanProvideExecutionObservations::class,
Expand All @@ -139,7 +144,7 @@ public function hasObservations() : bool {

// HELPERS //////////////////////////////////////////////////

public function exception() : Exception {
public function exception() : ?Exception {
return $this->exception;
}

Expand Down Expand Up @@ -207,21 +212,12 @@ public function hasFeedback() : bool {
* @return Observation[]
*/
public function summaries() : array {
return SelectObservations::from($this->observations)
return SelectObservations::from([$this->observations])
->withTypes(['summary'])
->all();
}

public function hasSummaries() : bool {
return count($this->summaries()) > 0;
}

// INTERNAL /////////////////////////////////////////////////

/**
* @return Observation[]
*/
private function processors() : array {
return array_merge($this->defaultObservers, $this->processors);
}
}
Loading

0 comments on commit a57863c

Please sign in to comment.