diff --git a/backend/backend.csproj b/backend/backend.csproj index cb5604c76f..4f6c015afd 100644 --- a/backend/backend.csproj +++ b/backend/backend.csproj @@ -26,7 +26,7 @@ - + diff --git a/backend/files/system/openai/import-page.post.hl b/backend/files/system/openai/import-page.post.hl index 8d8dc66743..b28d2ad1e6 100644 --- a/backend/files/system/openai/import-page.post.hl +++ b/backend/files/system/openai/import-page.post.hl @@ -7,6 +7,9 @@ type:string threshold:int summarize:bool + images:bool + lists:bool + code:bool feedback-channel:string // Ensures user is authorized to access endpoint. @@ -21,6 +24,9 @@ validators.integer:x:@.arguments/*/threshold min:25 validators.default:x:@.arguments summarize:bool:true + images:bool:true + lists:bool:true + code:bool:true // Doing some basic logging. log.info:Crawling page generating OpenAI training snippets @@ -47,6 +53,9 @@ fork feedback-channel:x:@.arguments/*/feedback-channel threshold:x:@.arguments/*/threshold summarize:x:@.arguments/*/summarize + lists:x:@.arguments/*/lists + images:x:@.arguments/*/images + code:x:@.arguments/*/code /* * Crawling is done. diff --git a/backend/files/system/openai/import-url.post.hl b/backend/files/system/openai/import-url.post.hl index f990ec0d1b..8a0b475a48 100644 --- a/backend/files/system/openai/import-url.post.hl +++ b/backend/files/system/openai/import-url.post.hl @@ -11,12 +11,21 @@ max:int threshold:int summarize:bool + images:bool + lists:bool + code:bool feedback-channel:string // Ensures user is authorized to access endpoint. auth.ticket.verify:root +// Applying defaults. +validators.default:x:@.arguments + images:bool:true + lists:bool:true + code:bool:true + // Prepending scheme unless given. if not diff --git a/backend/files/system/openai/magic.startup/crawling/magic.ai.crawl-site-on-thread.hl b/backend/files/system/openai/magic.startup/crawling/magic.ai.crawl-site-on-thread.hl index 4eba515511..d5b99ac95c 100644 --- a/backend/files/system/openai/magic.startup/crawling/magic.ai.crawl-site-on-thread.hl +++ b/backend/files/system/openai/magic.startup/crawling/magic.ai.crawl-site-on-thread.hl @@ -245,6 +245,9 @@ delete from ml_training_snippets where type = @type and uri like @uri;" type:x:@.arguments/*/type threshold:x:@.arguments/*/threshold feedback-channel:x:@.arguments/*/feedback-channel + images:x:@.arguments/*/images + lists:x:@.arguments/*/lists + code:x:@.arguments/*/code // Verifying we've got more snippets before applying Crawl-Delay if diff --git a/backend/files/system/openai/magic.startup/crawling/magic.ai.html.extract.hl b/backend/files/system/openai/magic.startup/crawling/magic.ai.html.extract.hl index c2097ce6ee..3f1912b8ca 100644 --- a/backend/files/system/openai/magic.startup/crawling/magic.ai.html.extract.hl +++ b/backend/files/system/openai/magic.startup/crawling/magic.ai.html.extract.hl @@ -14,6 +14,12 @@ slots.create:magic.ai.html.extract validators.mandatory:x:@.arguments/*/url validators.url:x:@.arguments/*/url + // Ensuring defaults. + validators.default:x:@.arguments + images:bool:true + lists:bool:true + code:bool:true + // Used to return meta information to caller. .meta main:int:0 @@ -36,6 +42,9 @@ slots.create:magic.ai.html.extract // Converting HTML to Markdown. html2markdown:x:@.arguments/*/html url:x:@.arguments/*/url + images:x:@.arguments/*/images + lists:x:@.arguments/*/lists + code:x:@.arguments/*/code // Checking if we have any Markdown at all, and if not we return early. if @@ -54,9 +63,9 @@ slots.create:magic.ai.html.extract // Retrieving title and description from document. set-value:x:@.title - get-value:x:@html2markdown/*/title + get-value:x:@html2markdown/*/title/[0,1] set-value:x:@.description - get-value:x:@html2markdown/*/description + get-value:x:@html2markdown/*/description/[0,1] // Adding URLs found as we transformed HTML to Markdown. add:x:@.urls diff --git a/backend/files/system/openai/magic.startup/crawling/magic.ai.url.scrape.hl b/backend/files/system/openai/magic.startup/crawling/magic.ai.url.scrape.hl index 4848d79224..c1bedd7b06 100644 --- a/backend/files/system/openai/magic.startup/crawling/magic.ai.url.scrape.hl +++ b/backend/files/system/openai/magic.startup/crawling/magic.ai.url.scrape.hl @@ -9,6 +9,9 @@ * - [type] - What type to import training snippets into. * - [summarize] - If true will summarize all training snippets that are larger than 1,000 tokens. * - [headers] - Optional collection of HTTML headers we should associate with HTTP request. + * - [images] - If true will import images + * - [lists] - If true will import lists + * - [code] - If true will import code */ slots.create:magic.ai.url.scrape @@ -17,9 +20,12 @@ slots.create:magic.ai.url.scrape validators.mandatory:x:@.arguments/*/type validators.url:x:@.arguments/*/url - // Ensuring [summarize] defaults to true. + // Ensuring defaults. validators.default:x:@.arguments summarize:bool:true + images:bool:true + lists:bool:true + code:bool:true // Signaling frontend. .msg @@ -54,6 +60,9 @@ slots.create:magic.ai.url.scrape html:x:@.html url:x:@.arguments/*/url feedback-channel:x:@.arguments/*/feedback-channel + images:x:@.arguments/*/images + lists:x:@.arguments/*/lists + code:x:@.arguments/*/code // URLs we return to caller. .urls diff --git a/backend/slots/Version.cs b/backend/slots/Version.cs index f35156d18e..28816fa16b 100644 --- a/backend/slots/Version.cs +++ b/backend/slots/Version.cs @@ -20,7 +20,7 @@ public class Version : ISlot /// Parameters passed from signaler public void Signal(ISignaler signaler, Node input) { - input.Value = "v17.3.5"; + input.Value = "v17.3.7"; } } } diff --git a/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-edit-training-snippet/machine-learning-edit-training-snippet.component.html b/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-edit-training-snippet/machine-learning-edit-training-snippet.component.html index 48f1fb05da..414e6de19c 100644 --- a/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-edit-training-snippet/machine-learning-edit-training-snippet.component.html +++ b/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-edit-training-snippet/machine-learning-edit-training-snippet.component.html @@ -180,6 +180,7 @@

diff --git a/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import-feedback/machine-learning-import-feedback.component.ts b/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import-feedback/machine-learning-import-feedback.component.ts index 283316e38b..434b48b5be 100644 --- a/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import-feedback/machine-learning-import-feedback.component.ts +++ b/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import-feedback/machine-learning-import-feedback.component.ts @@ -3,13 +3,16 @@ * Copyright (c) Thomas Hansen - For license inquiries you can contact thomas@ainiro.io. */ +// Angular and system specific imports. +import { MAT_DIALOG_DATA } from '@angular/material/dialog'; import { Component, Inject, OnDestroy, OnInit } from '@angular/core'; -import { MAT_DIALOG_DATA, MatDialogRef } from '@angular/material/dialog'; import { HttpTransportType, HubConnection, HubConnectionBuilder } from '@aspnet/signalr'; -import { BackendService } from 'src/app/services/backend.service'; + +// Application specific imports. import { ConfigService } from 'src/app/services/config.service'; -import { GeneralService } from 'src/app/services/general.service'; import { OpenAIService } from 'src/app/services/openai.service'; +import { BackendService } from 'src/app/services/backend.service'; +import { GeneralService } from 'src/app/services/general.service'; /** * Helper component to view feedback as we crawl URLs. @@ -27,7 +30,6 @@ export class MachineLearningImportFeedbackComponent implements OnInit, OnDestroy constructor( @Inject(MAT_DIALOG_DATA) public data: any, private backendService: BackendService, - private dialogRef: MatDialogRef, private generalService: GeneralService, private openAIService: OpenAIService, private configService: ConfigService) { } @@ -91,7 +93,10 @@ export class MachineLearningImportFeedbackComponent implements OnInit, OnDestroy this.data.max, this.data.threshold, this.data.summarize, - result.result).subscribe({ + result.result, + this.data.images, + this.data.lists, + this.data.code).subscribe({ next: () => { this.generalService.hideLoading(); @@ -112,7 +117,10 @@ export class MachineLearningImportFeedbackComponent implements OnInit, OnDestroy this.data.url, this.data.type, 50, - result.result).subscribe({ + result.result, + this.data.images, + this.data.lists, + this.data.code).subscribe({ next: () => { this.generalService.hideLoading(); diff --git a/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import/machine-learning-import.component.html b/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import/machine-learning-import.component.html index 2a8656d905..d2f1c497d4 100644 --- a/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import/machine-learning-import.component.html +++ b/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-import/machine-learning-import.component.html @@ -120,6 +120,33 @@

Import traini
+ + Lists + + + + Code + + + + Images + + Spice up '{{data.type}}' model

+ + Lists + + + + Code + + + + Images + +

diff --git a/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-spice/machine-learning-spice.component.ts b/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-spice/machine-learning-spice.component.ts index 7c34650b5b..4f28e146d3 100644 --- a/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-spice/machine-learning-spice.component.ts +++ b/frontend/src/app/components/protected/manage/machine-learning/components/machine-learning-spice/machine-learning-spice.component.ts @@ -23,14 +23,21 @@ export class MachineLearningSpiceComponent { CommonRegEx = CommonRegEx; CommonErrorMessages = CommonErrorMessages; + lists?: boolean = true; + images?: boolean = true; + code?: boolean = true; constructor( - private generalService: GeneralService, @Inject(MAT_DIALOG_DATA) public data: any, private dialogRef: MatDialogRef) { } scrape() { - this.dialogRef.close(this.url); + this.dialogRef.close({ + url: this.url, + lists: this.lists, + images: this.images, + code: this.code, + }); } } diff --git a/frontend/src/app/components/protected/manage/machine-learning/machine-learning-models/machine-learning-models.component.ts b/frontend/src/app/components/protected/manage/machine-learning/machine-learning-models/machine-learning-models.component.ts index 2260ac2bb2..dc2c824f18 100644 --- a/frontend/src/app/components/protected/manage/machine-learning/machine-learning-models/machine-learning-models.component.ts +++ b/frontend/src/app/components/protected/manage/machine-learning/machine-learning-models/machine-learning-models.component.ts @@ -153,7 +153,11 @@ export class MachineLearningModelsComponent implements OnInit { delay?: number, max?: number, threshold?: number, - summarize?: boolean }) => { + summarize?: boolean, + images?: boolean, + code?: boolean, + lists?: boolean, + }) => { if (result?.crawl) { @@ -168,7 +172,10 @@ export class MachineLearningModelsComponent implements OnInit { max: result.max, threshold: result.threshold, summarize: result.summarize, - mode: 'site' + images: result.images ?? true, + code: result.code ?? true, + lists: result.lists ?? true, + mode: 'site', } }); } diff --git a/frontend/src/app/components/protected/manage/machine-learning/machine-learning-training-data/machine-learning-training-data.component.ts b/frontend/src/app/components/protected/manage/machine-learning/machine-learning-training-data/machine-learning-training-data.component.ts index 4671774374..6e03f195b8 100644 --- a/frontend/src/app/components/protected/manage/machine-learning/machine-learning-training-data/machine-learning-training-data.component.ts +++ b/frontend/src/app/components/protected/manage/machine-learning/machine-learning-training-data/machine-learning-training-data.component.ts @@ -121,12 +121,15 @@ export class MachineLearningTrainingDataComponent implements OnInit { width: '80vw', maxWidth: '1280px', data: { - url: result, + url: result.url, type: this.type, delay: result.delay, max: result.max, threshold: result.threshold, summarize: result.summarize, + images: result.images, + lists: result.lists, + code: result.code, mode: 'single-page', callback: () => this.getTrainingData(true) } diff --git a/frontend/src/app/services/openai.service.ts b/frontend/src/app/services/openai.service.ts index 6681238cf9..3cacd4c8c4 100644 --- a/frontend/src/app/services/openai.service.ts +++ b/frontend/src/app/services/openai.service.ts @@ -140,7 +140,10 @@ export class OpenAIService { max: number, threshold: number, summarize: boolean, - feedbackChannel: string) { + feedbackChannel: string, + images: boolean = true, + lists: boolean = true, + code: boolean = true) { return this.httpService.post('/magic/system/openai/import-url', { url, @@ -149,6 +152,9 @@ export class OpenAIService { max, threshold, summarize, + images, + lists, + code, ['feedback-channel']: feedbackChannel, }); } @@ -160,12 +166,18 @@ export class OpenAIService { url: string, type: string, threshold: number, - feedbackChannel: string) { + feedbackChannel: string, + images: boolean = true, + lists: boolean = true, + code: boolean = true) { return this.httpService.post('/magic/system/openai/import-page', { url, type, threshold, + images, + lists, + code, ['feedback-channel']: feedbackChannel, }); }