Skip to content

Commit

Permalink
feat: rebase upstream changes to d78fab9 (#7)
Browse files Browse the repository at this point in the history
Signed-off-by: Mathew Wicks <5735406+thesuperzapper@users.noreply.github.com>
  • Loading branch information
thesuperzapper authored May 19, 2024
1 parent 1cb2316 commit da9cb36
Show file tree
Hide file tree
Showing 21 changed files with 575 additions and 1,758 deletions.
6 changes: 4 additions & 2 deletions dashboard/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Step 1: Builds and tests
FROM node:12.22.12-bullseye AS build
FROM node:14.21.3-bullseye AS build

ARG kubeflowversion
ARG commit
Expand All @@ -24,7 +24,9 @@ RUN BUILDARCH="$(dpkg --print-architecture)" && npm rebuild && \
npm prune --production

# Step 2: Packages assets for serving
FROM node:12.22.12-alpine AS serve
FROM node:14.21.3-alpine3.17 AS serve

USER node

ENV NODE_ENV=production
WORKDIR /app
Expand Down
47 changes: 0 additions & 47 deletions dashboard/Makefile

This file was deleted.

10 changes: 10 additions & 0 deletions dashboard/app/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {KubernetesService} from './k8s_service';
import {Interval, MetricsService} from './metrics_service';

export const ERRORS = {
no_metrics_service_configured: 'No metrics service configured',
operation_not_supported: 'Operation not supported',
invalid_links_config: 'Cannot load dashboard menu link',
invalid_settings: 'Cannot load dashboard settings'
Expand All @@ -28,6 +29,15 @@ export class Api {
*/
routes(): Router {
return Router()
.get('/metrics', async (req: Request, res: Response) => {
if (!this.metricsService) {
return apiError({
res, code: 405,
error: ERRORS.operation_not_supported,
});
}
res.json(this.metricsService.getChartsLink());
})
.get(
'/metrics/:type((node|podcpu|podmem))',
async (req: Request, res: Response) => {
Expand Down
30 changes: 25 additions & 5 deletions dashboard/app/api_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,22 @@ describe('Main API', () => {
port = addressInfo.port;
});

it('Should return a 405 status code', (done) => {
get(`http://localhost:${port}/api/metrics/podcpu`, (res) => {
expect(res.statusCode).toBe(405);
done();
it('Should return a 405 status code', async () => {
const metricsEndpoint = new Promise((resolve) => {
get(`http://localhost:${port}/api/metrics`, (res) => {
expect(res.statusCode).toBe(405);
resolve();
});
});

const metricsTypeEndpoint = new Promise((resolve) => {
get(`http://localhost:${port}/api/metrics/podcpu`, (res) => {
expect(res.statusCode).toBe(405);
resolve();
});
});

await Promise.all([metricsEndpoint, metricsTypeEndpoint]);
});
});

Expand All @@ -47,7 +58,7 @@ describe('Main API', () => {
mockK8sService = jasmine.createSpyObj<KubernetesService>(['']);
mockProfilesService = jasmine.createSpyObj<DefaultApi>(['']);
mockMetricsService = jasmine.createSpyObj<MetricsService>([
'getNodeCpuUtilization', 'getPodCpuUtilization', 'getPodMemoryUsage'
'getNodeCpuUtilization', 'getPodCpuUtilization', 'getPodMemoryUsage', 'getChartsLink'
]);

testApp = express();
Expand All @@ -64,6 +75,15 @@ describe('Main API', () => {
}
});

it('Should retrieve charts link in Metrics service', (done) => {
get(`http://localhost:${port}/api/metrics`, (res) => {
expect(res.statusCode).toBe(200);
expect(mockMetricsService.getChartsLink)
.toHaveBeenCalled();
done();
});
});

it('Should retrieve Node CPU Utilization for default 15m interval',
async () => {
const defaultInterval = new Promise((resolve) => {
Expand Down
11 changes: 11 additions & 0 deletions dashboard/app/metrics_service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ export interface TimeSeriesPoint {
value: number;
}

export interface MetricsInfo {
resourceChartsLink: string | undefined;
resourceChartsLinkText: string;
}

/**
* Interface definition for implementers of metrics services capable of
* returning time-series resource utilization metrics for the Kubeflow system.
Expand All @@ -39,4 +44,10 @@ export interface MetricsService {
* @param interval
*/
getPodMemoryUsage(interval: Interval): Promise<TimeSeriesPoint[]>;

/**
* Return a MetricsInfo object containing the url of the metric dashboard and the
* text to display for the redirect button.
*/
getChartsLink(): MetricsInfo;
}
90 changes: 90 additions & 0 deletions dashboard/app/prometheus_metrics_service.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import {Interval, MetricsInfo, MetricsService, TimeSeriesPoint} from "./metrics_service";
import {PrometheusDriver, RangeVector, ResponseType} from 'prometheus-query';

export class PrometheusMetricsService implements MetricsService {
private readonly prometheusDriver: PrometheusDriver;
private readonly dashboardUrl: string | undefined;

constructor(prometheusDriver: PrometheusDriver, dashboardUrl: string | undefined) {
this.prometheusDriver = prometheusDriver;
this.dashboardUrl = dashboardUrl;
}

async getNodeCpuUtilization(interval: Interval): Promise<TimeSeriesPoint[]> {
const query = `sum(rate(node_cpu_seconds_total[5m])) by (instance)`;
const result = await this.queryPrometheus(query, this.getCorrespondingTime(interval));
return this.convertToTimeSeriesPoints(result);
}

async getPodCpuUtilization(interval: Interval): Promise<TimeSeriesPoint[]> {
const query = `sum(rate(container_cpu_usage_seconds_total[5m]))`;
const result = await this.queryPrometheus(query, this.getCorrespondingTime(interval));
return this.convertToTimeSeriesPoints(result);
}

async getPodMemoryUsage(interval: Interval): Promise<TimeSeriesPoint[]> {
const query = `sum(container_memory_usage_bytes)`;
const result = await this.queryPrometheus(query, this.getCorrespondingTime(interval));
return this.convertToTimeSeriesPoints(result);
}

private async queryPrometheus(query: string, start: number, end: number = Date.now()): Promise<RangeVector[]> {
const result = await this.prometheusDriver.rangeQuery(query, start, end, 10);
if(result.resultType !== ResponseType.MATRIX) {
console.warn(`The prometheus server returned invalid result type: ${result.resultType}`);
return [];
}
return result.result as RangeVector[];
}

private getCorrespondingTime(interval: Interval): number {
let minutes = 0;
switch (interval) {
case Interval.Last5m:
minutes = 5;
break;
case Interval.Last15m:
minutes = 15;
break;
case Interval.Last30m:
minutes = 30;
break;
case Interval.Last60m:
minutes = 60;
break;
case Interval.Last180m:
minutes = 180;
break;
default:
console.warn("unknown interval.");
}
return Date.now() - minutes * 60 * 1000;
}

private convertToTimeSeriesPoints(series: RangeVector[]): TimeSeriesPoint[] {
const timeSeriesPoints: TimeSeriesPoint[] = [];
series.forEach(serie => {

const label = Object.entries(serie.metric.labels).map((entry) => {
return entry[0] + "=" + entry[1];
}).join(",");

// The `public/components/resource-chart.js` is multiplying the timestamp by 1000 and the value by 100
serie.values.forEach(value => {
timeSeriesPoints.push({
timestamp: value.time.getTime() / 1000,
label,
value: value.value / 100,
});
});
});
return timeSeriesPoints;
}

getChartsLink(): MetricsInfo {
return {
resourceChartsLink: this.dashboardUrl,
resourceChartsLinkText: 'View in dashboard'
};
}
}
142 changes: 142 additions & 0 deletions dashboard/app/prometheus_metrics_service_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import {Metric, PrometheusDriver, QueryResult, ResponseType} from "prometheus-query";
import {PrometheusMetricsService} from "./prometheus_metrics_service";
import {Interval, MetricsService, TimeSeriesPoint} from "./metrics_service";
import {SampleValue} from "prometheus-query/dist/types";

type MetricsServiceKeys = keyof MetricsService;
const methods: MetricsServiceKeys[] = ["getNodeCpuUtilization", "getPodCpuUtilization", "getPodMemoryUsage"];
const queries: {[id: string]: string} = {
"getNodeCpuUtilization": "sum(rate(node_cpu_seconds_total[5m])) by (instance)",
"getPodCpuUtilization": "sum(rate(container_cpu_usage_seconds_total[5m]))",
"getPodMemoryUsage": "sum(container_memory_usage_bytes)"
};

const fixedDate = 1557705600000;

const emptyDataSet: QueryResult = {"resultType": ResponseType.MATRIX,"result":[]};
const singleInstanceDataSet: QueryResult = {
"resultType": ResponseType.MATRIX,
"result":[
{
"metric": {"labels": {"instance":"one"}} as Metric,
"values":[
{
time: new Date(fixedDate),
value: 95.5,
} as SampleValue
]
}
]
};
const multipleInstancesDataSet: QueryResult = {
"resultType": ResponseType.MATRIX,
"result":[
{
"metric": {"labels": {"instance":"one"}} as Metric,
"values":[
{
time: new Date(fixedDate),
value: 1.0,
} as SampleValue
]
},
{
"metric": {"labels": {"instance":"two"}} as Metric,
"values":[
{
time: new Date(fixedDate),
value: 2.0,
} as SampleValue
]
},
{
"metric": {"labels": {"instance":"three"}} as Metric,
"values":[
{
time: new Date(fixedDate),
value: 3.0,
} as SampleValue
]
}
]
};

describe('PrometheusMetricsService', () => {
let prometheusDriverClient: jasmine.SpyObj<PrometheusDriver>;
let service: PrometheusMetricsService;

beforeEach(() => {
jasmine.clock().install();
jasmine.clock().mockDate(new Date(1557705600000));
prometheusDriverClient = jasmine.createSpyObj<PrometheusDriver>(
'prometheusDriverClient', ['rangeQuery']);

service =
new PrometheusMetricsService(prometheusDriverClient, undefined);
});

// Iterate over all methods since they have the same behavior
methods.forEach((method) => {
describe(method, async () => {
it('Empty return', async () => {
prometheusDriverClient.rangeQuery.withArgs(
queries[method],
Date.now() - 5 * 60 * 1000,
Date.now(),
10
).and.returnValue(Promise.resolve(emptyDataSet));

const emptyResult = await service[method](Interval.Last5m);
expect(emptyResult).toEqual(Array.of<TimeSeriesPoint>());
});

it('One instance', async () => {
prometheusDriverClient.rangeQuery.withArgs(
queries[method],
Date.now() - 5 * 60 * 1000,
Date.now(),
10
).and.returnValue(Promise.resolve(singleInstanceDataSet));

const singleInstanceResult = await service[method](Interval.Last5m);
expect(singleInstanceResult).toEqual(Array.of<TimeSeriesPoint>({
timestamp: fixedDate / 1000,
value: 0.955,
label: "instance=one"
}));
});

it('Multiple instances', async () => {
prometheusDriverClient.rangeQuery.withArgs(
queries[method],
Date.now() - 5 * 60 * 1000,
Date.now(),
10
).and.returnValue(Promise.resolve(multipleInstancesDataSet));

const singleInstanceResult = await service[method](Interval.Last5m);
expect(singleInstanceResult).toEqual(
Array.of<TimeSeriesPoint>({
timestamp: fixedDate / 1000,
value: 0.010,
label: "instance=one"
},
{
timestamp: fixedDate / 1000,
value: 0.020,
label: "instance=two"
},
{
timestamp: fixedDate / 1000,
value: 0.030,
label: "instance=three"
})
);
});
});
});

afterEach(() => {
jasmine.clock().uninstall();
});
});
Loading

0 comments on commit da9cb36

Please sign in to comment.