From 8656a2bf5cb0bc2e1e2d9750138a1cf37e8d8c27 Mon Sep 17 00:00:00 2001 From: Thomas Hansen Date: Wed, 14 Feb 2024 14:59:04 +0200 Subject: [PATCH] Upload CSV file support --- .../files/system/openai/upload-csv.post.hl | 64 +++++++++++++++++++ .../machine-learning-import.component.html | 56 +++++++++++++++- .../machine-learning-import.component.ts | 44 ++++++++++--- frontend/src/app/services/openai.service.ts | 8 +++ 4 files changed, 161 insertions(+), 11 deletions(-) create mode 100644 backend/files/system/openai/upload-csv.post.hl diff --git a/backend/files/system/openai/upload-csv.post.hl b/backend/files/system/openai/upload-csv.post.hl new file mode 100644 index 0000000000..3e6914b108 --- /dev/null +++ b/backend/files/system/openai/upload-csv.post.hl @@ -0,0 +1,64 @@ + +/* + * Stores the specified file as training data for the specified model. + * + * Contrary to the other upload endpoint, this assumes it's given a CSV file, and + * it will also use ALL columns in the file. It will assume the first column is + * the prompt though. + */ +.arguments + type:string + file:* +.type:internal +.accept:multipart/form-data +.description:Imports the specified CSV file as training data for the specified model + +// Ensures user is authorized to access endpoint. +auth.ticket.verify:root + +// Sanity checking invocation. +validators.mandatory:x:@.arguments/*/type +validators.mandatory:x:@.arguments/*/file +validators.mandatory:x:@.arguments/*/file/*/name +validators.mandatory:x:@.arguments/*/file/*/stream + +// Reading file data from stream. +io.stream.read:x:@.arguments/*/file/*/stream + +// Converting file to lambda object. +csv2lambda:x:@io.stream.read + +// Opening database connection. +data.connect:[generic|magic] + + // Iterating through each record in file. + for-each:x:@csv2lambda/* + + // Creating our completion. + .completion: + for-each:x:@.dp/#/*/[1,1000] + set-value:x:@.completion + strings.concat + get-value:x:@.completion + .:"\r\n" + .:"\r\n" + get-name:x:@.dp/# + .:": " + get-value:x:@.dp/# + set-value:x:@.completion + strings.trim:x:@.completion + + // Importing item into database. + data.create + table:ml_training_snippets + values + type:x:@.arguments/*/type + prompt:x:@.dp/#/0 + completion:x:@.completion + meta:x:@.arguments/*/file/*/name + +// Returning success to caller. +get-count:x:@csv2lambda/* +yield + result:success + count:x:@get-count diff --git a/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import/machine-learning-import.component.html b/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import/machine-learning-import.component.html index d2f1c497d4..36890241c6 100644 --- a/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import/machine-learning-import.component.html +++ b/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import/machine-learning-import.component.html @@ -19,7 +19,7 @@

Import traini

- Crawl and scrape the specified website for training data + Crawl and scrape the specified URL for training data

@@ -160,6 +160,58 @@

Import traini + + + + CSV file + + + +
+ +
+ +

+ Upload CSV file with unspecified columns. The first column is assumed to be the prompt, and all other columns will become part of the completion. +

+
+ +
+ +
+ + + + + + +

{{getFileName()}}

+

Uploading {{uploadIndex + 1}} of {{files.length}}...

+

Done!

+
+ +
+ +
+ +
+
+ @@ -172,7 +224,7 @@

Import traini

- Upload XML, JSON, YAML or CSV files as training data + Upload XML, JSON, YAML or CSV files as training data. Notice, this will only import two columns from your files, prompt and completion.

diff --git a/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import/machine-learning-import.component.ts b/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import/machine-learning-import.component.ts index d1b2cee89d..bfbc9c0170 100644 --- a/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import/machine-learning-import.component.ts +++ b/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import/machine-learning-import.component.ts @@ -22,6 +22,7 @@ export class MachineLearningImportComponent { uploading: boolean = false; trainingFileModel: string = ''; + trainingFileModelCsv: string = ''; url: string = null; delay: number = 1; max: number = 25; @@ -95,6 +96,19 @@ export class MachineLearningImportComponent { this.uploadCurrentFile(); } + getFileCsv(event: any) { + + if (!event || !event.target.files || event.target.files.length === 0) { + return; + } + this.uploading = true; + this.uploadIndex = 0; + this.uploadCount = 0; + this.files = event.target.files; + this.generalService.showLoading(); + this.uploadCurrentFile(true); + } + getFileName() { if (!this.files || this.files.length === 0 || this.uploadIndex >= this.files.length) { @@ -107,18 +121,22 @@ export class MachineLearningImportComponent { * Private helper methods. */ - private uploadCurrentFile() { + private uploadCurrentFile(csvFile: boolean = false) { const formData = new FormData(); formData.append('file', this.files[this.uploadIndex], this.files[this.uploadIndex].name); formData.append('type', this.data.type); - formData.append('prompt', this.prompt); - formData.append('completion', this.completion); - if (this.massage && this.massage !== '') { - formData.append('massage', this.massage); + if (!csvFile) { + formData.append('prompt', this.prompt); + formData.append('completion', this.completion); + if (this.massage && this.massage !== '') { + formData.append('massage', this.massage); + } } - this.openAIService.uploadTrainingFile(formData).subscribe({ + var svc = csvFile ? this.openAIService.uploadCsvFile.bind(this.openAIService) : this.openAIService.uploadTrainingFile.bind(this.openAIService); + + svc(formData).subscribe({ next: (result: any) => { this.uploadCount += result.count; @@ -131,7 +149,11 @@ export class MachineLearningImportComponent { this.generalService.hideLoading(); this.generalService.showFeedback(`${this.uploadCount} training snippets successfully imported`, 'successMessage'); this.uploading = false; - this.trainingFileModel = ''; + if (csvFile) { + this.trainingFileModelCsv = ''; + } else { + this.trainingFileModel = ''; + } this.uploadIndex = 0; this.files = null; this.matDialog.close(); @@ -139,13 +161,17 @@ export class MachineLearningImportComponent { } // More files remaining. - this.uploadCurrentFile(); + this.uploadCurrentFile(csvFile); }, 100); }, error: (error: any) => { this.uploading = false; - this.trainingFileModel = ''; + if (csvFile) { + this.trainingFileModelCsv = ''; + } else { + this.trainingFileModel = ''; + } this.generalService.showFeedback(error?.error?.message, 'errorMessage', 'Ok'); this.generalService.hideLoading(); } diff --git a/frontend/src/app/services/openai.service.ts b/frontend/src/app/services/openai.service.ts index 3cacd4c8c4..bf744592dd 100644 --- a/frontend/src/app/services/openai.service.ts +++ b/frontend/src/app/services/openai.service.ts @@ -122,6 +122,14 @@ export class OpenAIService { return this.httpService.post('/magic/system/openai/upload-training-data', data); } + /** + * Uploads the specified training data file to the backend. + */ + uploadCsvFile(data: FormData) { + + return this.httpService.post('/magic/system/openai/upload-csv', data); + } + /** * Uploads training data to OpenAI and starts a new training session. */