From 207559b0a230d350ec3fd699080e4a19bc000040 Mon Sep 17 00:00:00 2001 From: Zhiming Ma Date: Thu, 13 Jul 2023 16:31:20 +0800 Subject: [PATCH] feat: Agent add postprocess for repetitive patterns. (#294) --- clients/tabby-agent/.mocha.env.js | 2 +- clients/tabby-agent/.mocharc.js | 2 +- clients/tabby-agent/.prettierrc.json | 4 +- clients/tabby-agent/openapi/tabby.json | 38 ++-------- clients/tabby-agent/package.json | 6 +- clients/tabby-agent/src/CompletionCache.ts | 2 +- clients/tabby-agent/src/StdIO.ts | 8 +- clients/tabby-agent/src/TabbyAgent.ts | 75 +++++++++++-------- clients/tabby-agent/src/cli.ts | 1 - clients/tabby-agent/src/env.ts | 3 +- clients/tabby-agent/src/logger.ts | 26 ++++--- clients/tabby-agent/src/postprocess/base.ts | 56 ++++++++++++++ .../tabby-agent/src/postprocess/dropBlank.ts | 2 +- .../src/postprocess/dropDuplicated.test.ts | 62 +++++++++++++++ .../src/postprocess/dropDuplicated.ts | 40 ++++++++++ clients/tabby-agent/src/postprocess/filter.ts | 21 ------ clients/tabby-agent/src/postprocess/index.ts | 28 +++++-- .../limitScopeByIndentation.test.ts | 10 --- .../postprocess/limitScopeByIndentation.ts | 7 +- .../removeLineEndsWithRepetition.test.ts | 53 +++++++++++++ .../removeLineEndsWithRepetition.ts | 37 +++++++++ .../src/postprocess/removeOverlapping.ts | 5 +- .../removeRepetitiveBlocks.test.ts | 54 +++++++++++++ .../src/postprocess/removeRepetitiveBlocks.ts | 55 ++++++++++++++ .../postprocess/removeRepetitiveLines.test.ts | 62 +++++++++++++++ .../src/postprocess/removeRepetitiveLines.ts | 48 ++++++++++++ .../tabby-agent/src/postprocess/testUtils.ts | 6 +- clients/tabby-agent/src/utils.ts | 8 ++ clients/vscode/package.json | 2 +- clients/vscode/src/TabbyCompletionProvider.ts | 1 + yarn.lock | 12 +++ 31 files changed, 602 insertions(+), 134 deletions(-) create mode 100644 clients/tabby-agent/src/postprocess/base.ts create mode 100644 clients/tabby-agent/src/postprocess/dropDuplicated.test.ts create mode 100644 clients/tabby-agent/src/postprocess/dropDuplicated.ts delete mode 100644 clients/tabby-agent/src/postprocess/filter.ts create mode 100644 clients/tabby-agent/src/postprocess/removeLineEndsWithRepetition.test.ts create mode 100644 clients/tabby-agent/src/postprocess/removeLineEndsWithRepetition.ts create mode 100644 clients/tabby-agent/src/postprocess/removeRepetitiveBlocks.test.ts create mode 100644 clients/tabby-agent/src/postprocess/removeRepetitiveBlocks.ts create mode 100644 clients/tabby-agent/src/postprocess/removeRepetitiveLines.test.ts create mode 100644 clients/tabby-agent/src/postprocess/removeRepetitiveLines.ts diff --git a/clients/tabby-agent/.mocha.env.js b/clients/tabby-agent/.mocha.env.js index 3d8ee67..14f1564 100644 --- a/clients/tabby-agent/.mocha.env.js +++ b/clients/tabby-agent/.mocha.env.js @@ -1,3 +1,3 @@ -process.env.NODE_ENV = 'test'; +process.env.NODE_ENV = "test"; process.env.IS_BROWSER = false; process.env.IS_TEST = true; diff --git a/clients/tabby-agent/.mocharc.js b/clients/tabby-agent/.mocharc.js index 9c63a59..bbea715 100644 --- a/clients/tabby-agent/.mocharc.js +++ b/clients/tabby-agent/.mocharc.js @@ -1,4 +1,4 @@ module.exports = { spec: ["src/**/*.test.ts"], require: ["ts-node/register", "./.mocha.env.js"], -}; \ No newline at end of file +}; diff --git a/clients/tabby-agent/.prettierrc.json b/clients/tabby-agent/.prettierrc.json index c42ca6e..963354f 100644 --- a/clients/tabby-agent/.prettierrc.json +++ b/clients/tabby-agent/.prettierrc.json @@ -1,3 +1,3 @@ -{ - "printWidth": 120 +{ + "printWidth": 120 } diff --git a/clients/tabby-agent/openapi/tabby.json b/clients/tabby-agent/openapi/tabby.json index 33a297e..a1a83d4 100644 --- a/clients/tabby-agent/openapi/tabby.json +++ b/clients/tabby-agent/openapi/tabby.json @@ -22,9 +22,7 @@ "paths": { "/v1/completions": { "post": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "completion", "requestBody": { "content": { @@ -55,9 +53,7 @@ }, "/v1/events": { "post": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "event", "requestBody": { "content": { @@ -81,9 +77,7 @@ }, "/v1/health": { "post": { - "tags": [ - "v1" - ], + "tags": ["v1"], "operationId": "health", "responses": { "200": { @@ -104,10 +98,7 @@ "schemas": { "Choice": { "type": "object", - "required": [ - "index", - "text" - ], + "required": ["index", "text"], "properties": { "index": { "type": "integer", @@ -156,10 +147,7 @@ }, "CompletionResponse": { "type": "object", - "required": [ - "id", - "choices" - ], + "required": ["id", "choices"], "properties": { "id": { "type": "string" @@ -174,11 +162,7 @@ }, "HealthState": { "type": "object", - "required": [ - "model", - "device", - "compute_type" - ], + "required": ["model", "device", "compute_type"], "properties": { "model": { "type": "string" @@ -193,11 +177,7 @@ }, "LogEventRequest": { "type": "object", - "required": [ - "type", - "completion_id", - "choice_index" - ], + "required": ["type", "completion_id", "choice_index"], "properties": { "type": { "type": "string", @@ -216,9 +196,7 @@ }, "Segments": { "type": "object", - "required": [ - "prefix" - ], + "required": ["prefix"], "properties": { "prefix": { "type": "string", diff --git a/clients/tabby-agent/package.json b/clients/tabby-agent/package.json index a01c9d0..82599e6 100644 --- a/clients/tabby-agent/package.json +++ b/clients/tabby-agent/package.json @@ -12,8 +12,9 @@ "dev": "tsup --watch --no-minify --no-treeshake", "prebuild": "yarn openapi-codegen", "build": "tsup", - "test:watch": "mocha --watch", - "test": "mocha" + "test:watch": "env TEST_LOG_DEBUG=1 mocha --watch", + "test": "mocha", + "lint": "prettier --write ." }, "devDependencies": { "@types/chai": "^4.3.5", @@ -36,6 +37,7 @@ "chokidar": "^3.5.3", "deep-equal": "^2.2.1", "deepmerge": "^4.3.1", + "fast-levenshtein": "^3.0.0", "form-data": "^4.0.0", "fs-extra": "^11.1.1", "jwt-decode": "^3.1.2", diff --git a/clients/tabby-agent/src/CompletionCache.ts b/clients/tabby-agent/src/CompletionCache.ts index 308d40c..2cdd64f 100644 --- a/clients/tabby-agent/src/CompletionCache.ts +++ b/clients/tabby-agent/src/CompletionCache.ts @@ -59,7 +59,7 @@ export class CompletionCache { private createCacheEntries( key: CompletionCacheKey, - value: CompletionCacheValue + value: CompletionCacheValue, ): { key: CompletionCacheKey; value: CompletionCacheValue }[] { const list = [{ key, value }]; if (this.options.partiallyAcceptedCacheGeneration.enabled) { diff --git a/clients/tabby-agent/src/StdIO.ts b/clients/tabby-agent/src/StdIO.ts index 092144e..aefc7fa 100644 --- a/clients/tabby-agent/src/StdIO.ts +++ b/clients/tabby-agent/src/StdIO.ts @@ -8,7 +8,7 @@ type AgentFunctionRequest = [ data: { func: T; args: Parameters; - } + }, ]; type CancellationRequest = [ @@ -16,14 +16,14 @@ type CancellationRequest = [ data: { func: "cancelRequest"; args: [id: number]; - } + }, ]; type Request = AgentFunctionRequest | CancellationRequest; type AgentFunctionResponse = [ id: number, // Matched request id - data: ReturnType + data: ReturnType, ]; type AgentEventNotification = { @@ -33,7 +33,7 @@ type AgentEventNotification = { type CancellationResponse = [ id: number, // Matched request id - data: boolean + data: boolean, ]; type Response = AgentFunctionResponse | AgentEventNotification | CancellationResponse; diff --git a/clients/tabby-agent/src/TabbyAgent.ts b/clients/tabby-agent/src/TabbyAgent.ts index 3d9de53..88696ff 100644 --- a/clients/tabby-agent/src/TabbyAgent.ts +++ b/clients/tabby-agent/src/TabbyAgent.ts @@ -17,7 +17,7 @@ import { Auth } from "./Auth"; import { AgentConfig, defaultAgentConfig, userAgentConfig } from "./AgentConfig"; import { CompletionCache } from "./CompletionCache"; import { DataStore } from "./dataStore"; -import { postprocess } from "./postprocess"; +import { postprocess, preCacheProcess } from "./postprocess"; import { rootLogger, allLoggers } from "./logger"; import { AnonymousUsageLogger } from "./AnonymousUsageLogger"; @@ -91,7 +91,7 @@ export class TabbyAgent extends EventEmitter implements Agent { private callApi( api: (request: Request) => CancelablePromise, - request: Request + request: Request, ): CancelablePromise { this.logger.debug({ api: api.name, request }, "API request"); const promise = api.call(this.api.v1, request); @@ -119,7 +119,7 @@ export class TabbyAgent extends EventEmitter implements Agent { }), () => { promise.cancel(); - } + }, ); } @@ -227,7 +227,7 @@ export class TabbyAgent extends EventEmitter implements Agent { }), () => { polling.cancel(); - } + }, ); } @@ -235,39 +235,50 @@ export class TabbyAgent extends EventEmitter implements Agent { if (this.status === "notInitialized") { return cancelable(Promise.reject("Agent is not initialized"), () => {}); } - if (this.completionCache.has(request)) { - this.logger.debug({ request }, "Completion cache hit"); - return new CancelablePromise((resolve) => { - resolve(this.completionCache.get(request)); - }); - } - const segments = this.createSegments(request); - if (isBlank(segments.prefix)) { - this.logger.debug("Segment prefix is blank, returning empty completion response"); - return new CancelablePromise((resolve) => { - resolve({ - id: "agent-" + uuid(), - choices: [], - }); - }); - } - const promise = this.callApi(this.api.v1.completion, { - language: request.language, - segments, - user: this.auth?.user, - }); + const cancelableList: CancelablePromise[] = []; return cancelable( - promise - .then((response) => { - this.completionCache.set(request, response); - return response; + Promise.resolve(null) + // From cache + .then((response: CompletionResponse | null) => { + if (response) return response; + if (this.completionCache.has(request)) { + this.logger.debug({ request }, "Completion cache hit"); + return this.completionCache.get(request); + } }) - .then((response) => { + // From api + .then((response: CompletionResponse | null) => { + if (response) return response; + const segments = this.createSegments(request); + if (isBlank(segments.prefix)) { + this.logger.debug("Segment prefix is blank, returning empty completion response"); + return { + id: "agent-" + uuid(), + choices: [], + }; + } + const apiRequest = this.callApi(this.api.v1.completion, { + language: request.language, + segments, + user: this.auth?.user, + }); + cancelableList.push(apiRequest); + return apiRequest + .then((response) => { + return preCacheProcess(request, response); + }) + .then((response) => { + this.completionCache.set(request, response); + return response; + }); + }) + // Postprocess + .then((response: CompletionResponse | null) => { return postprocess(request, response); }), () => { - promise.cancel(); - } + cancelableList.forEach((cancelable) => cancelable.cancel()); + }, ); } diff --git a/clients/tabby-agent/src/cli.ts b/clients/tabby-agent/src/cli.ts index 1e1972f..7246771 100644 --- a/clients/tabby-agent/src/cli.ts +++ b/clients/tabby-agent/src/cli.ts @@ -8,4 +8,3 @@ TabbyAgent.create().then((agent) => { stdio.bind(agent); stdio.listen(); }); - diff --git a/clients/tabby-agent/src/env.ts b/clients/tabby-agent/src/env.ts index 1b57d83..c74ca76 100644 --- a/clients/tabby-agent/src/env.ts +++ b/clients/tabby-agent/src/env.ts @@ -1,2 +1,3 @@ export const isBrowser = !!process.env.IS_BROWSER; -export const isTest = !!process.env.IS_TEST; \ No newline at end of file +export const isTest = !!process.env.IS_TEST; +export const testLogDebug = !!process.env.TEST_LOG_DEBUG; diff --git a/clients/tabby-agent/src/logger.ts b/clients/tabby-agent/src/logger.ts index e433bc4..29785d4 100644 --- a/clients/tabby-agent/src/logger.ts +++ b/clients/tabby-agent/src/logger.ts @@ -1,21 +1,25 @@ import pino from "pino"; -import { isBrowser } from "./env"; +import { isBrowser, isTest, testLogDebug } from "./env"; /** * Stream not available in browser, will use default console output. */ -const stream = isBrowser - ? null - : /** - * Default rotating file locate at `~/.tabby/agent/logs/`. - */ - require("rotating-file-stream").createStream("tabby-agent.log", { - path: require("path").join(require("os").homedir(), ".tabby", "agent", "logs"), - size: "10M", - interval: "1d", - }); +const stream = + isBrowser || isTest + ? null + : /** + * Default rotating file locate at `~/.tabby/agent/logs/`. + */ + require("rotating-file-stream").createStream("tabby-agent.log", { + path: require("path").join(require("os").homedir(), ".tabby", "agent", "logs"), + size: "10M", + interval: "1d", + }); export const rootLogger = !!stream ? pino(stream) : pino(); +if (isTest && testLogDebug) { + rootLogger.level = "debug"; +} export const allLoggers = [rootLogger]; rootLogger.onChild = (child) => { diff --git a/clients/tabby-agent/src/postprocess/base.ts b/clients/tabby-agent/src/postprocess/base.ts new file mode 100644 index 0000000..d0b2698 --- /dev/null +++ b/clients/tabby-agent/src/postprocess/base.ts @@ -0,0 +1,56 @@ +import { CompletionRequest, CompletionResponse } from "../Agent"; +import { splitLines } from "../utils"; +import { rootLogger } from "../logger"; + +export type PostprocessContext = { + request: CompletionRequest; // request contains full context, others are for easy access + prefix: string; + suffix: string; + prefixLines: string[]; + suffixLines: string[]; +}; +export type PostprocessFilter = (item: string) => string | null | Promise; + +export const logger = rootLogger.child({ component: "Postprocess" }); + +export function buildContext(request: CompletionRequest): PostprocessContext { + const prefix = request.text.slice(0, request.position); + const suffix = request.text.slice(request.position); + const prefixLines = splitLines(prefix); + const suffixLines = splitLines(suffix); + return { + request, + prefix, + suffix, + prefixLines, + suffixLines, + }; +} + +declare global { + interface Array { + distinct(identity?: (x: T) => any): Array; + } +} + +if (!Array.prototype.distinct) { + Array.prototype.distinct = function (this: T[], identity?: (x: T) => any): T[] { + return [...new Map(this.map((item) => [identity?.(item) ?? item, item])).values()]; + }; +} + +export function applyFilter(filter: PostprocessFilter): (response: CompletionResponse) => Promise { + return async (response: CompletionResponse) => { + response.choices = ( + await Promise.all( + response.choices.map(async (choice) => { + choice.text = await filter(choice.text); + return choice; + }), + ) + ) + .filter((choice) => !!choice.text) + .distinct((choice) => choice.text); + return response; + }; +} diff --git a/clients/tabby-agent/src/postprocess/dropBlank.ts b/clients/tabby-agent/src/postprocess/dropBlank.ts index d012c98..29e1a0a 100644 --- a/clients/tabby-agent/src/postprocess/dropBlank.ts +++ b/clients/tabby-agent/src/postprocess/dropBlank.ts @@ -1,4 +1,4 @@ -import { PostprocessFilter } from "./filter"; +import { PostprocessFilter } from "./base"; import { isBlank } from "../utils"; export const dropBlank: () => PostprocessFilter = () => { diff --git a/clients/tabby-agent/src/postprocess/dropDuplicated.test.ts b/clients/tabby-agent/src/postprocess/dropDuplicated.test.ts new file mode 100644 index 0000000..31fee16 --- /dev/null +++ b/clients/tabby-agent/src/postprocess/dropDuplicated.test.ts @@ -0,0 +1,62 @@ +import { expect } from "chai"; +import { documentContext, inline } from "./testUtils"; +import { dropDuplicated } from "./dropDuplicated"; + +describe("postprocess", () => { + describe("dropDuplicated", () => { + it("should drop completion duplicated with suffix", () => { + const context = { + ...documentContext` + let sum = (a, b) => { + ║return a + b; + }; + `, + language: "javascript", + }; + // completion give a `;` at end but context have not + const completion = inline` + ├return a + b;┤ + `; + expect(dropDuplicated(context)(completion)).to.be.null; + }); + + it("should drop completion similar to suffix", () => { + const context = { + ...documentContext` + let sum = (a, b) => { + return a + b; + ║ + }; + `, + language: "javascript", + }; + // the difference is a `\n` + const completion = inline` + ├}┤ + `; + expect(dropDuplicated(context)(completion)).to.be.null; + }); + + it("should drop completion that first 3 lines are similar to suffix", () => { + const context = { + ...documentContext` + var a, b; + // swap a and b║ + let z = a; + a = b; + b = z; + // something else + `, + language: "javascript", + }; + const completion = inline` + ├ + let c = a; + a = b; + b = c; + console.log({a, b});┤ + `; + expect(dropDuplicated(context)(completion)).to.be.null; + }); + }); +}); diff --git a/clients/tabby-agent/src/postprocess/dropDuplicated.ts b/clients/tabby-agent/src/postprocess/dropDuplicated.ts new file mode 100644 index 0000000..4363de7 --- /dev/null +++ b/clients/tabby-agent/src/postprocess/dropDuplicated.ts @@ -0,0 +1,40 @@ +import { PostprocessFilter, PostprocessContext, logger } from "./base"; +import { splitLines, isBlank, calcDistance } from "../utils"; + +export const dropDuplicated: (context: PostprocessContext) => PostprocessFilter = (context) => { + return (input) => { + // get first n (n <= 3) lines of input and suffix, ignore blank lines + const { suffixLines } = context; + const inputLines = splitLines(input); + let inputIndex = 0; + while (inputIndex < inputLines.length && isBlank(inputLines[inputIndex])) { + inputIndex++; + } + let suffixIndex = 0; + while (suffixIndex < suffixLines.length && isBlank(suffixLines[suffixIndex])) { + suffixIndex++; + } + const lineCount = Math.min(3, inputLines.length - inputIndex, suffixLines.length - suffixIndex); + if (lineCount < 1) return input; + const inputToCompare = inputLines + .slice(inputIndex, inputIndex + lineCount) + .join("") + .trim(); + const suffixToCompare = suffixLines + .slice(suffixIndex, suffixIndex + lineCount) + .join("") + .trim(); + // if string distance is less than threshold (threshold = 3, or 5% of string length) + // drop this completion due to duplicated + const threshold = Math.max(3, 0.05 * inputToCompare.length, 0.05 * suffixToCompare.length); + const distance = calcDistance(inputToCompare, suffixToCompare); + if (distance <= threshold) { + logger.debug( + { inputLines, suffixLines, inputToCompare, suffixToCompare, distance, threshold }, + "Drop completion due to duplicated.", + ); + return null; + } + return input; + }; +}; diff --git a/clients/tabby-agent/src/postprocess/filter.ts b/clients/tabby-agent/src/postprocess/filter.ts deleted file mode 100644 index 1d0e8d0..0000000 --- a/clients/tabby-agent/src/postprocess/filter.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { CompletionRequest, CompletionResponse } from "../Agent"; -import { rootLogger } from "../logger"; - -export type PostprocessContext = CompletionRequest; -export type PostprocessFilter = (item: string) => string | null | Promise; - -export const logger = rootLogger.child({ component: "Postprocess" }); - -export const applyFilter = (filter: PostprocessFilter) => { - return async (response: CompletionResponse) => { - response.choices = ( - await Promise.all( - response.choices.map(async (choice) => { - choice.text = await filter(choice.text); - return choice; - }) - ) - ).filter(Boolean); - return response; - }; -}; diff --git a/clients/tabby-agent/src/postprocess/index.ts b/clients/tabby-agent/src/postprocess/index.ts index ccbbe55..8e90397 100644 --- a/clients/tabby-agent/src/postprocess/index.ts +++ b/clients/tabby-agent/src/postprocess/index.ts @@ -1,15 +1,33 @@ import { CompletionRequest, CompletionResponse } from "../Agent"; -import { applyFilter } from "./filter"; +import { buildContext, applyFilter } from "./base"; +import { removeRepetitiveBlocks } from "./removeRepetitiveBlocks"; +import { removeRepetitiveLines } from "./removeRepetitiveLines"; +import { removeLineEndsWithRepetition } from "./removeLineEndsWithRepetition"; import { limitScopeByIndentation } from "./limitScopeByIndentation"; import { removeOverlapping } from "./removeOverlapping"; +import { dropDuplicated } from "./dropDuplicated"; import { dropBlank } from "./dropBlank"; +export async function preCacheProcess( + request: CompletionRequest, + response: CompletionResponse, +): Promise { + const context = buildContext(request); + return Promise.resolve(response) + .then(applyFilter(removeLineEndsWithRepetition(context))) + .then(applyFilter(removeOverlapping(context))) + .then(applyFilter(dropDuplicated(context))) + .then(applyFilter(dropBlank())); +} + export async function postprocess( request: CompletionRequest, - response: CompletionResponse + response: CompletionResponse, ): Promise { - return new Promise((resolve) => resolve(response)) - .then(applyFilter(limitScopeByIndentation(request))) - .then(applyFilter(removeOverlapping(request))) + const context = buildContext(request); + return Promise.resolve(response) + .then(applyFilter(removeRepetitiveBlocks(context))) + .then(applyFilter(removeRepetitiveLines(context))) + .then(applyFilter(limitScopeByIndentation(context))) .then(applyFilter(dropBlank())); } diff --git a/clients/tabby-agent/src/postprocess/limitScopeByIndentation.test.ts b/clients/tabby-agent/src/postprocess/limitScopeByIndentation.test.ts index 61aecaa..2445e36 100644 --- a/clients/tabby-agent/src/postprocess/limitScopeByIndentation.test.ts +++ b/clients/tabby-agent/src/postprocess/limitScopeByIndentation.test.ts @@ -2,15 +2,6 @@ import { expect } from "chai"; import { documentContext, inline } from "./testUtils"; import { limitScopeByIndentation } from "./limitScopeByIndentation"; -const buildContext = (doc: string) => { - return { - filepath: null, - language: "javascript", - text: doc.replace(/║/, ""), - position: doc.indexOf("║"), - }; -}; - describe("postprocess", () => { describe("limitScopeByIndentation", () => { it("should remove content out of current intent scope", () => { @@ -67,7 +58,6 @@ describe("postprocess", () => { expect(limitScopeByIndentation(context)(completion)).to.eq(expected); }); - it("should allow single level closing bracket", () => { const context = { ...documentContext` diff --git a/clients/tabby-agent/src/postprocess/limitScopeByIndentation.ts b/clients/tabby-agent/src/postprocess/limitScopeByIndentation.ts index e206dda..1d02ca0 100644 --- a/clients/tabby-agent/src/postprocess/limitScopeByIndentation.ts +++ b/clients/tabby-agent/src/postprocess/limitScopeByIndentation.ts @@ -1,4 +1,4 @@ -import { PostprocessFilter, PostprocessContext, logger } from "./filter"; +import { PostprocessFilter, PostprocessContext, logger } from "./base"; import { isBlank, splitLines } from "../utils"; function calcIndentLevel(line) { @@ -27,10 +27,7 @@ function isOpeningIndentBlock(lines, index) { export const limitScopeByIndentation: (context: PostprocessContext) => PostprocessFilter = (context) => { return (input) => { - const prefix = context.text.slice(0, context.position); - const suffix = context.text.slice(context.position); - const prefixLines = splitLines(prefix); - const suffixLines = splitLines(suffix); + const { prefix, suffix, prefixLines, suffixLines } = context; const inputLines = splitLines(input); const currentIndentLevel = calcIndentLevel(prefixLines[prefixLines.length - 1]); let index; diff --git a/clients/tabby-agent/src/postprocess/removeLineEndsWithRepetition.test.ts b/clients/tabby-agent/src/postprocess/removeLineEndsWithRepetition.test.ts new file mode 100644 index 0000000..5082710 --- /dev/null +++ b/clients/tabby-agent/src/postprocess/removeLineEndsWithRepetition.test.ts @@ -0,0 +1,53 @@ +import { expect } from "chai"; +import { documentContext, inline } from "./testUtils"; +import { removeLineEndsWithRepetition } from "./removeLineEndsWithRepetition"; + +describe("postprocess", () => { + describe("removeLineEndsWithRepetition", () => { + it("should drop one line completion ends with repetition", () => { + const context = { + ...documentContext` + let foo = ║ + `, + language: "javascript", + }; + const completion = inline` + ├foo = foo = foo = foo = foo = foo = foo =┤ + `; + expect(removeLineEndsWithRepetition(context)(completion)).to.be.null; + }); + + it("should remove last line that ends with repetition", () => { + const context = { + ...documentContext` + let largeNumber = 1000000 + let veryLargeNumber = ║ + `, + language: "javascript", + }; + const completion = inline` + ├1000000000 + let superLargeNumber = 1000000000000000000000000000000000000000000000┤ + `; + const expected = inline` + ├1000000000┤ + `; + expect(removeLineEndsWithRepetition(context)(completion)).to.eq(expected); + }); + + it("should keep repetition less than threshold", () => { + const context = { + ...documentContext` + let largeNumber = 1000000 + let veryLargeNumber = ║ + `, + language: "javascript", + }; + const completion = inline` + ├1000000000000┤ + `; + const expected = completion; + expect(removeLineEndsWithRepetition(context)(completion)).to.eq(expected); + }); + }); +}); diff --git a/clients/tabby-agent/src/postprocess/removeLineEndsWithRepetition.ts b/clients/tabby-agent/src/postprocess/removeLineEndsWithRepetition.ts new file mode 100644 index 0000000..1f5573c --- /dev/null +++ b/clients/tabby-agent/src/postprocess/removeLineEndsWithRepetition.ts @@ -0,0 +1,37 @@ +import { PostprocessFilter, PostprocessContext, logger } from "./base"; +import { splitLines, isBlank } from "../utils"; + +const repetitionTests = [ + /(.{3,}?)\1{5,}$/g, // match a 3+ characters pattern repeating 5+ times + /(.{10,}?)\1{3,}$/g, // match a 10+ characters pattern repeating 3+ times +]; + +export const removeLineEndsWithRepetition: (context: PostprocessContext) => PostprocessFilter = () => { + return (input) => { + // only test last non-blank line + const inputLines = splitLines(input); + let index = inputLines.length - 1; + while (index >= 0 && isBlank(inputLines[index])) { + index--; + } + if (index < 0) return input; + // if matches repetition test, remove this line + for (const test of repetitionTests) { + const match = inputLines[index].match(test); + if (match) { + logger.debug( + { + inputLines, + lineNumber: index, + match, + }, + "Remove line ends with repetition.", + ); + if (index < 1) return null; + return inputLines.slice(0, index).join("").trimEnd(); + } + } + // no repetition found + return input; + }; +}; diff --git a/clients/tabby-agent/src/postprocess/removeOverlapping.ts b/clients/tabby-agent/src/postprocess/removeOverlapping.ts index 8779c38..00c7fc0 100644 --- a/clients/tabby-agent/src/postprocess/removeOverlapping.ts +++ b/clients/tabby-agent/src/postprocess/removeOverlapping.ts @@ -1,8 +1,9 @@ -import { PostprocessFilter, PostprocessContext, logger } from "./filter"; +import { PostprocessFilter, PostprocessContext, logger } from "./base"; export const removeOverlapping: (context: PostprocessContext) => PostprocessFilter = (context) => { return (input) => { - const suffix = context.text.slice(context.position); + const request = context.request; + const suffix = request.text.slice(request.position); for (let index = Math.max(0, input.length - suffix.length); index < input.length; index++) { if (input.slice(index) === suffix.slice(0, input.length - index)) { logger.debug({ input, suffix, overlappedAt: index }, "Remove overlapped content"); diff --git a/clients/tabby-agent/src/postprocess/removeRepetitiveBlocks.test.ts b/clients/tabby-agent/src/postprocess/removeRepetitiveBlocks.test.ts new file mode 100644 index 0000000..2c6057c --- /dev/null +++ b/clients/tabby-agent/src/postprocess/removeRepetitiveBlocks.test.ts @@ -0,0 +1,54 @@ +import { expect } from "chai"; +import { documentContext, inline } from "./testUtils"; +import { removeRepetitiveBlocks } from "./removeRepetitiveBlocks"; + +describe("postprocess", () => { + describe("removeRepetitiveBlocks", () => { + it("should remove repetitive blocks", () => { + const context = { + ...documentContext` + function myFuncA() { + console.log("myFuncA called."); + } + + ║ + `, + language: "javascript", + }; + const completion = inline` + ├function myFuncB() { + console.log("myFuncB called."); + } + + function myFuncC() { + console.log("myFuncC called."); + } + + function myFuncD() { + console.log("myFuncD called."); + } + + function myFuncE() { + console.log("myFuncE called."); + } + + function myFuncF() { + console.log("myFuncF called."); + } + + function myFuncG() { + console.log("myFuncG called."); + } + + function myFuncH() { + console.log("myFuncH ┤ + `; + const expected = inline` + ├function myFuncB() { + console.log("myFuncB called."); + }┤ + `; + expect(removeRepetitiveBlocks(context)(completion)).to.eq(expected); + }); + }); +}); diff --git a/clients/tabby-agent/src/postprocess/removeRepetitiveBlocks.ts b/clients/tabby-agent/src/postprocess/removeRepetitiveBlocks.ts new file mode 100644 index 0000000..44413d0 --- /dev/null +++ b/clients/tabby-agent/src/postprocess/removeRepetitiveBlocks.ts @@ -0,0 +1,55 @@ +import { PostprocessFilter, PostprocessContext, logger } from "./base"; +import { isBlank, calcDistance } from "../utils"; + +function blockSplitter(language) { + // Have not implemented this for each language for now + // Return a blank line matcher should work for most cases + return /\n(\s*)\n/g; +} + +// FIXME: refactor this because it is very similar to `removeRepetitiveLines` +export const removeRepetitiveBlocks: (context: PostprocessContext) => PostprocessFilter = (context) => { + return (input) => { + const inputBlocks = input.split(blockSplitter(context.request.language)); + let repetitionCount = 0; + const repetitionThreshold = 2; + // skip last block, it maybe cut + let index = inputBlocks.length - 2; + while (index >= 1) { + if (isBlank(inputBlocks[index])) { + index--; + continue; + } + let prev = index - 1; + while (prev >= 0 && isBlank(inputBlocks[prev])) { + prev--; + } + if (prev < 0) break; + // if distance between current and previous block is less than threshold (threshold = 3, or 10% of string length) + const currentBlock = inputBlocks[index].trim(); + const previousBlock = inputBlocks[prev].trim(); + const threshold = Math.max(3, 0.1 * currentBlock.length, 0.1 * previousBlock.length); + const distance = calcDistance(currentBlock, previousBlock); + if (distance <= threshold) { + repetitionCount++; + index--; + } else { + break; + } + } + if (repetitionCount >= repetitionThreshold) { + logger.debug( + { + inputBlocks, + repetitionCount, + }, + "Remove repetitive blocks.", + ); + return inputBlocks + .slice(0, index + 1) + .join("") + .trimEnd(); + } + return input; + }; +}; diff --git a/clients/tabby-agent/src/postprocess/removeRepetitiveLines.test.ts b/clients/tabby-agent/src/postprocess/removeRepetitiveLines.test.ts new file mode 100644 index 0000000..70c55d9 --- /dev/null +++ b/clients/tabby-agent/src/postprocess/removeRepetitiveLines.test.ts @@ -0,0 +1,62 @@ +import { expect } from "chai"; +import { documentContext, inline } from "./testUtils"; +import { removeRepetitiveLines } from "./removeRepetitiveLines"; + +describe("postprocess", () => { + describe("removeRepetitiveLines", () => { + it("should remove repetitive lines", () => { + const context = { + ...documentContext` + function hello() { + console.log("hello"); + } + hello(); + hello(); + ║ + `, + language: "javascript", + }; + const completion = inline` + ├hello(); + hello(); + hello(); + hello(); + hello(); + hello(); + hello(); + hello(); + hello(); + hello();┤ + `; + const expected = inline` + ├hello();┤ + `; + expect(removeRepetitiveLines(context)(completion)).to.eq(expected); + }); + + it("should remove repetitive lines with patterns", () => { + const context = { + ...documentContext` + const a = 1; + ║ + `, + language: "javascript", + }; + const completion = inline` + ├const b = 1; + const c = 1; + const d = 1; + const e = 1; + const f = 1; + const g = 1; + const h = 1; + const i = 1; + const j = 1; + const k =┤`; + const expected = inline` + ├const b = 1;┤ + `; + expect(removeRepetitiveLines(context)(completion)).to.eq(expected); + }); + }); +}); diff --git a/clients/tabby-agent/src/postprocess/removeRepetitiveLines.ts b/clients/tabby-agent/src/postprocess/removeRepetitiveLines.ts new file mode 100644 index 0000000..c28abbd --- /dev/null +++ b/clients/tabby-agent/src/postprocess/removeRepetitiveLines.ts @@ -0,0 +1,48 @@ +import { PostprocessFilter, PostprocessContext, logger } from "./base"; +import { splitLines, isBlank, calcDistance } from "../utils"; + +export const removeRepetitiveLines: (context: PostprocessContext) => PostprocessFilter = () => { + return (input) => { + const inputLines = splitLines(input); + let repetitionCount = 0; + const repetitionThreshold = 5; + // skip last line, it could be a not completed line + let index = inputLines.length - 2; + while (index >= 1) { + if (isBlank(inputLines[index])) { + index--; + continue; + } + let prev = index - 1; + while (prev >= 0 && isBlank(inputLines[prev])) { + prev--; + } + if (prev < 0) break; + // if distance between current and previous line is less than threshold (threshold = 3, or 10% of string length) + const currentLine = inputLines[index].trim(); + const previousLine = inputLines[prev].trim(); + const threshold = Math.max(3, 0.1 * currentLine.length, 0.1 * previousLine.length); + const distance = calcDistance(currentLine, previousLine); + if (distance <= threshold) { + repetitionCount++; + index = prev; + } else { + break; + } + } + if (repetitionCount >= repetitionThreshold) { + logger.debug( + { + inputLines, + repetitionCount, + }, + "Remove repetitive lines.", + ); + return inputLines + .slice(0, index + 1) + .join("") + .trimEnd(); + } + return input; + }; +}; diff --git a/clients/tabby-agent/src/postprocess/testUtils.ts b/clients/tabby-agent/src/postprocess/testUtils.ts index a83f456..9eb151a 100644 --- a/clients/tabby-agent/src/postprocess/testUtils.ts +++ b/clients/tabby-agent/src/postprocess/testUtils.ts @@ -1,18 +1,18 @@ import dedent from "dedent"; -import type { PostprocessContext } from "./filter"; +import { buildContext, PostprocessContext } from "./base"; // `║` is the cursor position export function documentContext(strings): PostprocessContext { const doc = dedent(strings); - return { + return buildContext({ filepath: null, language: null, text: doc.replace(/║/, ""), position: doc.indexOf("║"), maxPrefixLines: 20, maxSuffixLines: 20, - }; + }); } // `├` start of the inline completion to insert diff --git a/clients/tabby-agent/src/utils.ts b/clients/tabby-agent/src/utils.ts index a25c853..9b1cca0 100644 --- a/clients/tabby-agent/src/utils.ts +++ b/clients/tabby-agent/src/utils.ts @@ -10,6 +10,14 @@ export function isBlank(input: string) { return input.trim().length === 0; } +// Using string levenshtein distance is not good, because variable name may create a large distance. +// Such as distance is 9 between `const fooFooFoo = 1;` and `const barBarBar = 1;`, but maybe 1 is enough. +// May be better to count distance based on words instead of characters. +import * as levenshtein from "fast-levenshtein"; +export function calcDistance(a: string, b: string) { + return levenshtein.get(a, b); +} + import { CancelablePromise } from "./generated"; export function cancelable(promise: Promise, cancel: () => void): CancelablePromise { return new CancelablePromise((resolve, reject, onCancel) => { diff --git a/clients/vscode/package.json b/clients/vscode/package.json index 59c9c91..5698902 100644 --- a/clients/vscode/package.json +++ b/clients/vscode/package.json @@ -131,7 +131,7 @@ }, "scripts": { "build": "tsup --minify --treeshake smallest", - "watch": "tsup --sourcemap --watch ./ --watch ../tabby-agent/dist", + "watch": "tsup --sourcemap --watch ./ --ignore-watch ./dist --watch ../tabby-agent/dist", "dev": "code --extensionDevelopmentPath=$PWD --disable-extensions && yarn watch", "dev:browser": "vscode-test-web --extensionDevelopmentPath=$PWD --browserType=chromium --port=3000 && yarn watch", "lint": "eslint . --fix", diff --git a/clients/vscode/src/TabbyCompletionProvider.ts b/clients/vscode/src/TabbyCompletionProvider.ts index 894692a..d91d4ac 100644 --- a/clients/vscode/src/TabbyCompletionProvider.ts +++ b/clients/vscode/src/TabbyCompletionProvider.ts @@ -107,6 +107,7 @@ export class TabbyCompletionProvider implements InlineCompletionItemProvider { return ")]}".indexOf(suffix) > -1; } + // FIXME: move replace range calculation to tabby-agent private calculateReplaceRange(document: TextDocument, position: Position): Range { const hasSuffixParen = this.hasSuffixParen(document, position); if (hasSuffixParen) { diff --git a/yarn.lock b/yarn.lock index e7f334e..3466a13 100644 --- a/yarn.lock +++ b/yarn.lock @@ -1646,11 +1646,23 @@ fast-levenshtein@^2.0.6: resolved "https://registry.yarnpkg.com/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz#3d8a5c66883a16a30ca8643e851f19baa7797917" integrity sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw== +fast-levenshtein@^3.0.0: + version "3.0.0" + resolved "https://registry.yarnpkg.com/fast-levenshtein/-/fast-levenshtein-3.0.0.tgz#37b899ae47e1090e40e3fd2318e4d5f0142ca912" + integrity sha512-hKKNajm46uNmTlhHSyZkmToAc56uZJwYq7yrciZjqOxnlfQwERDQJmHPUp7m1m9wx8vgOe8IaCKZ5Kv2k1DdCQ== + dependencies: + fastest-levenshtein "^1.0.7" + fast-redact@^3.1.1: version "3.2.0" resolved "https://registry.yarnpkg.com/fast-redact/-/fast-redact-3.2.0.tgz#b1e2d39bc731376d28bde844454fa23e26919987" integrity sha512-zaTadChr+NekyzallAMXATXLOR8MNx3zqpZ0MUF2aGf4EathnG0f32VLODNlY8IuGY3HoRO2L6/6fSzNsLaHIw== +fastest-levenshtein@^1.0.7: + version "1.0.16" + resolved "https://registry.yarnpkg.com/fastest-levenshtein/-/fastest-levenshtein-1.0.16.tgz#210e61b6ff181de91ea9b3d1b84fdedd47e034e5" + integrity sha512-eRnCtTTtGZFpQCwhJiUOuxPQWRXVKYDn0b2PeHfXL6/Zi53SLAzAHfVhVWK2AryC/WH05kGfxhFIPvTF0SXQzg== + fastq@^1.6.0: version "1.15.0" resolved "https://registry.yarnpkg.com/fastq/-/fastq-1.15.0.tgz#d04d07c6a2a68fe4599fea8d2e103a937fae6b3a"