Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding more granular diff format for autoedits model training #6173

Draft
wants to merge 12 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions vscode/src/autoedits/prompt-utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ line 64

</area_around_code_to_rewrite>


Now, continue where I left off and finish my change by rewriting "code_to_rewrite":
`
expect(prompt.toString()).toEqual(expectedPrompt)
Expand Down
69 changes: 57 additions & 12 deletions vscode/src/autoedits/prompt-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,11 @@ interface CurrentFileContext {
range: vscode.Range
}

interface RecentEditPromptComponents {
longTermDiff: PromptString
shortTermDiff: PromptString
}

// Helper function to get prompt in some format
export function getBaseUserPrompt(
docContext: DocumentContext,
Expand Down Expand Up @@ -107,10 +112,8 @@ export function getBaseUserPrompt(
getRecentlyViewedSnippetsPrompt
)

const recentEditsPrompt = getPromptForTheContextSource(
contextItemMapping.get(RetrieverIdentifier.RecentEditsRetriever) || [],
RECENT_EDITS_INSTRUCTION,
getRecentEditsPrompt
const recentEditsPromptComponents = getRecentEditsPromptComponents(
contextItemMapping.get(RetrieverIdentifier.RecentEditsRetriever) || []
)

const lintErrorsPrompt = getPromptForTheContextSource(
Expand All @@ -134,10 +137,11 @@ export function getBaseUserPrompt(
${jaccardSimilarityPrompt}
${recentViewsPrompt}
${CURRENT_FILE_INSTRUCTION}${fileWithMarkerPrompt}
${recentEditsPrompt}
${recentEditsPromptComponents.longTermDiff}
${lintErrorsPrompt}
${recentCopyPrompt}
${areaPrompt}
${recentEditsPromptComponents.shortTermDiff}
${FINAL_USER_PROMPT}
`
autoeditsLogger.logDebug('AutoEdits', 'Prompt\n', finalPrompt)
Expand Down Expand Up @@ -323,24 +327,61 @@ ${RECENT_COPY_TAG_CLOSE}
`
}

export function getRecentEditsPrompt(contextItems: AutocompleteContextSnippet[]): PromptString {
export function getRecentEditsPromptComponents(
contextItems: AutocompleteContextSnippet[]
): RecentEditPromptComponents {
const recentEdits = getContextItemsForIdentifier(
contextItems,
RetrieverIdentifier.RecentEditsRetriever
)
recentEdits.reverse()
if (recentEdits.length === 0) {
let shortTermDiff: PromptString = ps``
let longTermDiff: PromptString = ps``
if (recentEdits.length > 0) {
shortTermDiff = getRecentEditPrompt([recentEdits.at(-1)!])
}
if (recentEdits.length > 1) {
const longTermDiffPrompt = getRecentEditPromptLongTermDiffComponent(recentEdits.slice(0, -1))
longTermDiff = ps`${RECENT_EDITS_INSTRUCTION}
${longTermDiffPrompt}
`
}
return {
shortTermDiff,
longTermDiff,
}
}

function getRecentEditPromptLongTermDiffComponent(context: AutocompleteContextSnippet[]): PromptString {
if (context.length === 0) {
return ps``
}
const recentEditsPrompts = recentEdits.map(item =>
getContextPromptWithPath(
const prompts = context.map(item =>
getContextPromptForDiffWithPath(
PromptString.fromDisplayPath(item.uri),
PromptString.fromAutocompleteContextSnippet(item).content
)
)
const recentEditsPrompt = PromptString.join(recentEditsPrompts, ps`\n`)
return ps`${RECENT_EDITS_TAG_OPEN}
${recentEditsPrompt}
return ps`
${RECENT_EDITS_TAG_OPEN}
${PromptString.join(prompts, ps`\n`)}
${RECENT_EDITS_TAG_CLOSE}
`
}

function getRecentEditPrompt(contextItems: AutocompleteContextSnippet[]): PromptString {
if (contextItems.length === 0) {
return ps``
}
const prompts = contextItems.map(item =>
getContextPromptForDiffWithPath(
PromptString.fromDisplayPath(item.uri),
PromptString.fromAutocompleteContextSnippet(item).content
)
)
return ps`
${RECENT_EDITS_TAG_OPEN}
${PromptString.join(prompts, ps`\n`)}
${RECENT_EDITS_TAG_CLOSE}
`
}
Expand Down Expand Up @@ -455,3 +496,7 @@ function getContextItemsForIdentifier(
function getContextPromptWithPath(filePath: PromptString, content: PromptString): PromptString {
return ps`(\`${filePath}\`)\n\n${content}\n`
}

function getContextPromptForDiffWithPath(filePath: PromptString, content: PromptString): PromptString {
return ps`${filePath}\n${content}`
}
2 changes: 1 addition & 1 deletion vscode/src/completions/context/context-data-logging.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import type { RetrievedContextResults } from './completions-context-ranker'
import { JaccardSimilarityRetriever } from './retrievers/jaccard-similarity/jaccard-similarity-retriever'
import { DiagnosticsRetriever } from './retrievers/recent-user-actions/diagnostics-retriever'
import { RecentCopyRetriever } from './retrievers/recent-user-actions/recent-copy'
import { RecentEditsRetrieverDiffStrategyIdentifier } from './retrievers/recent-user-actions/recent-edits-diff-helpers/recent-edits-diff-strategy'
import { RecentEditsRetrieverDiffStrategyIdentifier } from './retrievers/recent-user-actions/recent-edits-diff-helpers/base'
import { RecentEditsRetriever } from './retrievers/recent-user-actions/recent-edits-retriever'
import { RecentViewPortRetriever } from './retrievers/recent-user-actions/recent-view-port'
import { RetrieverIdentifier } from './utils'
Expand Down
4 changes: 2 additions & 2 deletions vscode/src/completions/context/context-strategy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import { JaccardSimilarityRetriever } from './retrievers/jaccard-similarity/jacc
import { LspLightRetriever } from './retrievers/lsp-light/lsp-light-retriever'
import { DiagnosticsRetriever } from './retrievers/recent-user-actions/diagnostics-retriever'
import { RecentCopyRetriever } from './retrievers/recent-user-actions/recent-copy'
import { RecentEditsRetrieverDiffStrategyIdentifier } from './retrievers/recent-user-actions/recent-edits-diff-helpers/recent-edits-diff-strategy'
import { RecentEditsRetrieverDiffStrategyIdentifier } from './retrievers/recent-user-actions/recent-edits-diff-helpers/base'
import { RecentEditsRetriever } from './retrievers/recent-user-actions/recent-edits-retriever'
import { RecentViewPortRetriever } from './retrievers/recent-user-actions/recent-view-port'
import { loadTscRetriever } from './retrievers/tsc/load-tsc-retriever'
Expand Down Expand Up @@ -128,7 +128,7 @@ export class DefaultContextStrategyFactory implements ContextStrategyFactory {
new RecentEditsRetriever({
maxAgeMs: 10 * 60 * 1000,
diffStrategyIdentifier:
RecentEditsRetrieverDiffStrategyIdentifier.UnifiedDiffWithLineNumbers,
RecentEditsRetrieverDiffStrategyIdentifier.AutoeditWithShortTermDiff,
}),
new DiagnosticsRetriever({
contextLines: 0,
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,64 +1,70 @@
import type * as vscode from 'vscode'
import type {
DiffCalculationInput,
DiffHunk,
RecentEditsRetrieverDiffStrategy,
TextDocumentChange,
} from './recent-edits-diff-strategy'
import { applyTextDocumentChanges, computeDiffWithLineNumbers } from './utils'
} from './base'
import { groupOverlappingDocumentChanges } from './utils'
import {
type TextDocumentChangeGroup,
getDiffHunkFromUnifiedPatch,
getUnifiedDiffHunkFromTextDocumentChange,
} from './utils'

/**
* Generates a single unified diff patch that combines all changes
* made to a document into one consolidated view.
*/
export class AutoeditWithShortTermDiffStrategy implements RecentEditsRetrieverDiffStrategy {
private shortTermDiffWindowMs = 5 * 1000 // 5 seconds
private longTermContextLines = 3
private shortTermContextLines = 0

public getDiffHunks(input: DiffCalculationInput): DiffHunk[] {
const [shortTermChanges, longTermChanges] = this.divideChangesIntoWindows(input.changes)
const [shortTermHunks, shortTermNewContent] = this.getDiffHunksForChanges(
input.uri,
input.oldContent,
shortTermChanges,
this.shortTermContextLines
)
const [longTermHunks, _] = this.getDiffHunksForChanges(
input.uri,
shortTermNewContent,
longTermChanges,
this.longTermContextLines
)
return [shortTermHunks, longTermHunks]
const rawChanges = groupOverlappingDocumentChanges(input.changes)
const { shortTermChanges, longTermChanges } =
this.divideChangesIntoShortTermAndLongTerm(rawChanges)

const longTermPatch = getUnifiedDiffHunkFromTextDocumentChange({
uri: input.uri,
oldContent: input.oldContent,
changes: longTermChanges,
addLineNumbersForDiff: true,
contextLines: this.longTermContextLines,
})
const shortTermPatch = getUnifiedDiffHunkFromTextDocumentChange({
uri: input.uri,
oldContent: longTermPatch?.newContent || input.oldContent,
changes: shortTermChanges,
addLineNumbersForDiff: true,
contextLines: this.shortTermContextLines,
})
return [
getDiffHunkFromUnifiedPatch(shortTermPatch),
getDiffHunkFromUnifiedPatch(longTermPatch),
].filter((hunk): hunk is DiffHunk => hunk !== undefined)
}

private getDiffHunksForChanges(
uri: vscode.Uri,
oldContent: string,
changes: TextDocumentChange[],
numContextLines: number
): [DiffHunk, string] {
const newContent = applyTextDocumentChanges(
oldContent,
changes.map(c => c.change)
)
const gitDiff = computeDiffWithLineNumbers(uri, oldContent, newContent, numContextLines)
const diffHunk = {
diff: gitDiff,
latestEditTimestamp: Math.max(...changes.map(c => c.timestamp)),
private divideChangesIntoShortTermAndLongTerm(changes: TextDocumentChangeGroup[]): {
shortTermChanges: TextDocumentChange[]
longTermChanges: TextDocumentChange[]
} {
if (changes.length <= 1) {
return {
shortTermChanges: this.convertTextDocumentChangeGroupToTextDocumentChange(changes),
longTermChanges: [],
}
}
return {
shortTermChanges: this.convertTextDocumentChangeGroupToTextDocumentChange(changes.slice(-1)),
longTermChanges: this.convertTextDocumentChangeGroupToTextDocumentChange(
changes.slice(0, -1)
),
}
return [diffHunk, newContent]
}

private divideChangesIntoWindows(
changes: TextDocumentChange[]
): [TextDocumentChange[], TextDocumentChange[]] {
// Divide the changes into 2 different windows, where the second window is the short term changes under 5 seconds
const now = Date.now()
const index = changes.findIndex(c => now - c.timestamp < this.shortTermDiffWindowMs)
const shortTermChanges = changes.slice(0, index)
const longTermChanges = changes.slice(index)
return [shortTermChanges, longTermChanges]
private convertTextDocumentChangeGroupToTextDocumentChange(
changeGroup: TextDocumentChangeGroup[]
): TextDocumentChange[] {
return changeGroup.flatMap(group => group.changes)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import dedent from 'dedent'
import { describe, expect, it } from 'vitest'
import * as vscode from 'vscode'
import { AutoeditWithShortTermDiffStrategy } from './auotedit-short-term-diff'
import { getTextDocumentChangesForText } from './helper'

const processComputedDiff = (text: string): string => {
const lines = text.split('\n')
const updatedText = lines.filter(line => !line.includes('\\ No newline at end of file')).join('\n')
return updatedText
}

describe('AutoeditWithShortTermDiffStrategy', () => {
const strategy = new AutoeditWithShortTermDiffStrategy()

it('handles multiple changes across different lines', () => {
const text = dedent`
<DC>let</DC><IC>const</IC> x = 5;
<DC>var</DC><IC>let</IC> y = 10;
console.log('break');
<DC>let</DC><IC>const</IC> z = 5;
console.log(<DC>x +</DC><IC>x *</IC> y);
`
const { originalText, changes } = getTextDocumentChangesForText(text)
const diffs = strategy.getDiffHunks({
uri: vscode.Uri.parse('file://test.ts'),
oldContent: originalText,
changes,
})
expect(diffs.length).toBe(2)
expect(processComputedDiff(diffs[0].diff.toString())).toMatchInlineSnapshot(`
"5-| console.log(x + y);
5+| console.log(x * y);"
`)
expect(processComputedDiff(diffs[1].diff.toString())).toMatchInlineSnapshot(`
"1-| let x = 5;
2-| var y = 10;
1+| const x = 5;
2+| let y = 10;
3 | console.log('break');
4-| let z = 5;
4+| const z = 5;
5 | console.log(x + y);"
`)
})
})
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ export interface RecentEditsRetrieverDiffStrategy {
export interface TextDocumentChange {
timestamp: number
change: vscode.TextDocumentContentChangeEvent
// The range in the document where the text was inserted.
insertedRange: vscode.Range
}

export interface DiffCalculationInput {
Expand All @@ -57,6 +59,14 @@ export interface DiffCalculationInput {
}

export interface DiffHunk {
uri: vscode.Uri
latestEditTimestamp: number
diff: PromptString
}

export interface UnifiedPatchResponse {
uri: vscode.Uri
newContent: string
diff: PromptString
latestEditTimestamp: number
}
Loading
Loading