feat(agent): monitor completion response time. (#359)
* feat(agent): monitor completion response time. * fix(agent): improve help message for running large model on cpu. * fix: notification spacing.release-0.0
parent
2026b4dd0e
commit
732d83feef
|
|
@ -37,17 +37,9 @@
|
|||
"responses": {
|
||||
"200": {
|
||||
"description": "Success",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CompletionResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
"content": { "application/json": { "schema": { "$ref": "#/components/schemas/CompletionResponse" } } }
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request"
|
||||
}
|
||||
"400": { "description": "Bad Request" }
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
@ -56,23 +48,10 @@
|
|||
"tags": ["v1"],
|
||||
"operationId": "event",
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/LogEventRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"content": { "application/json": { "schema": { "$ref": "#/components/schemas/LogEventRequest" } } },
|
||||
"required": true
|
||||
},
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "Success"
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request"
|
||||
}
|
||||
}
|
||||
"responses": { "200": { "description": "Success" }, "400": { "description": "Bad Request" } }
|
||||
}
|
||||
},
|
||||
"/v1/health": {
|
||||
|
|
@ -82,13 +61,7 @@
|
|||
"responses": {
|
||||
"200": {
|
||||
"description": "Success",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/HealthState"
|
||||
}
|
||||
}
|
||||
}
|
||||
"content": { "application/json": { "schema": { "$ref": "#/components/schemas/HealthState" } } }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -100,108 +73,62 @@
|
|||
"type": "object",
|
||||
"required": ["index", "text"],
|
||||
"properties": {
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"minimum": 0
|
||||
},
|
||||
"text": {
|
||||
"type": "string"
|
||||
}
|
||||
"index": { "type": "integer", "format": "int32", "minimum": 0.0 },
|
||||
"text": { "type": "string" }
|
||||
}
|
||||
},
|
||||
"CompletionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"prompt": {
|
||||
"type": "string",
|
||||
"example": "def fib(n):",
|
||||
"nullable": true
|
||||
},
|
||||
"prompt": { "type": "string", "example": "def fib(n):", "nullable": true },
|
||||
"language": {
|
||||
"type": "string",
|
||||
"description": "Language identifier, full list is maintained at\nhttps://code.visualstudio.com/docs/languages/identifiers",
|
||||
"example": "python",
|
||||
"nullable": true
|
||||
},
|
||||
"segments": {
|
||||
"allOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/Segments"
|
||||
}
|
||||
],
|
||||
"nullable": true
|
||||
},
|
||||
"user": {
|
||||
"type": "string",
|
||||
"nullable": true
|
||||
}
|
||||
"segments": { "allOf": [{ "$ref": "#/components/schemas/Segments" }], "nullable": true },
|
||||
"user": { "type": "string", "nullable": true }
|
||||
},
|
||||
"example": {
|
||||
"language": "python",
|
||||
"segments": {
|
||||
"prefix": "def fib(n):\n ",
|
||||
"suffix": "\n return fib(n - 1) + fib(n - 2)"
|
||||
}
|
||||
"segments": { "prefix": "def fib(n):\n ", "suffix": "\n return fib(n - 1) + fib(n - 2)" }
|
||||
}
|
||||
},
|
||||
"CompletionResponse": {
|
||||
"type": "object",
|
||||
"required": ["id", "choices"],
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"choices": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/Choice"
|
||||
}
|
||||
}
|
||||
"id": { "type": "string" },
|
||||
"choices": { "type": "array", "items": { "$ref": "#/components/schemas/Choice" } }
|
||||
}
|
||||
},
|
||||
"HealthState": {
|
||||
"type": "object",
|
||||
"required": ["model", "device", "compute_type"],
|
||||
"required": ["model", "device", "compute_type", "arch", "cpu_info", "cpu_count"],
|
||||
"properties": {
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"device": {
|
||||
"type": "string"
|
||||
},
|
||||
"compute_type": {
|
||||
"type": "string"
|
||||
}
|
||||
"model": { "type": "string" },
|
||||
"device": { "type": "string" },
|
||||
"compute_type": { "type": "string" },
|
||||
"arch": { "type": "string" },
|
||||
"cpu_info": { "type": "string" },
|
||||
"cpu_count": { "type": "integer", "minimum": 0.0 }
|
||||
}
|
||||
},
|
||||
"LogEventRequest": {
|
||||
"type": "object",
|
||||
"required": ["type", "completion_id", "choice_index"],
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Event type, should be `view` or `select`.",
|
||||
"example": "view"
|
||||
},
|
||||
"completion_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"choice_index": {
|
||||
"type": "integer",
|
||||
"format": "int32",
|
||||
"minimum": 0
|
||||
}
|
||||
"type": { "type": "string", "description": "Event type, should be `view` or `select`.", "example": "view" },
|
||||
"completion_id": { "type": "string" },
|
||||
"choice_index": { "type": "integer", "format": "int32", "minimum": 0.0 }
|
||||
}
|
||||
},
|
||||
"Segments": {
|
||||
"type": "object",
|
||||
"required": ["prefix"],
|
||||
"properties": {
|
||||
"prefix": {
|
||||
"type": "string",
|
||||
"description": "Content that appears before the cursor in the editor window."
|
||||
},
|
||||
"prefix": { "type": "string", "description": "Content that appears before the cursor in the editor window." },
|
||||
"suffix": {
|
||||
"type": "string",
|
||||
"description": "Content that appears after the cursor in the editor window.",
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import {
|
|||
CancelablePromise,
|
||||
LogEventRequest as ApiLogEventRequest,
|
||||
CompletionResponse as ApiCompletionResponse,
|
||||
HealthState,
|
||||
} from "./generated";
|
||||
|
||||
import { AgentConfig, PartialAgentConfig } from "./AgentConfig";
|
||||
|
|
@ -11,11 +12,14 @@ export type AgentInitOptions = Partial<{
|
|||
client: string;
|
||||
}>;
|
||||
|
||||
export type ServerHealthState = HealthState;
|
||||
|
||||
export type CompletionRequest = {
|
||||
filepath: string;
|
||||
language: string;
|
||||
text: string;
|
||||
position: number;
|
||||
manually?: boolean;
|
||||
maxPrefixLines?: number;
|
||||
maxSuffixLines?: number;
|
||||
};
|
||||
|
|
@ -24,16 +28,31 @@ export type CompletionResponse = ApiCompletionResponse;
|
|||
|
||||
export type LogEventRequest = ApiLogEventRequest;
|
||||
|
||||
export type SlowCompletionResponseTimeIssue = {
|
||||
name: "slowCompletionResponseTime";
|
||||
completionResponseStats: Record<string, number>;
|
||||
};
|
||||
export type HighCompletionTimeoutRateIssue = {
|
||||
name: "highCompletionTimeoutRate";
|
||||
completionResponseStats: Record<string, number>;
|
||||
};
|
||||
export type AgentIssue = SlowCompletionResponseTimeIssue | HighCompletionTimeoutRateIssue;
|
||||
|
||||
/**
|
||||
* `notInitialized`: When the agent is not initialized.
|
||||
* `ready`: When the agent get a valid response from the server, and is ready to use.
|
||||
* `disconnected`: When the agent failed to connect to the server.
|
||||
* `unauthorized`: When the server is set to a Tabby Cloud endpoint that requires auth,
|
||||
* Represents the status of the agent.
|
||||
* @enum
|
||||
* @property {string} notInitialized - When the agent is not initialized.
|
||||
* @property {string} ready - When the agent gets a valid response from the server.
|
||||
* @property {string} disconnected - When the agent fails to connect to the server.
|
||||
* @property {string} unauthorized - When the server is set to a Tabby Cloud endpoint that requires auth,
|
||||
* and no `Authorization` request header is provided in the agent config,
|
||||
* and user has not completed the auth flow or the auth token is expired.
|
||||
* and the user has not completed the auth flow or the auth token is expired.
|
||||
* See also `requestAuthUrl` and `waitForAuthToken`.
|
||||
* @property {string} issuesExist - When the agent gets a valid response from the server, but still
|
||||
* has some non-blocking issues, e.g. the average completion response time is too slow,
|
||||
* or the timeout rate is too high.
|
||||
*/
|
||||
export type AgentStatus = "notInitialized" | "ready" | "disconnected" | "unauthorized";
|
||||
export type AgentStatus = "notInitialized" | "ready" | "disconnected" | "unauthorized" | "issuesExist";
|
||||
|
||||
export interface AgentFunction {
|
||||
/**
|
||||
|
|
@ -70,6 +89,16 @@ export interface AgentFunction {
|
|||
*/
|
||||
getStatus(): AgentStatus;
|
||||
|
||||
/**
|
||||
* @returns the current issues if AgentStatus is `issuesExist`, otherwise returns empty array
|
||||
*/
|
||||
getIssues(): AgentIssue[];
|
||||
|
||||
/**
|
||||
* @returns server info returned from latest server health check, returns null if not available
|
||||
*/
|
||||
getServerHealthState(): ServerHealthState | null;
|
||||
|
||||
/**
|
||||
* Request auth url for Tabby Cloud endpoint. Only return value when the `AgentStatus` is `unauthorized`.
|
||||
* Otherwise, return null. See also `AgentStatus`.
|
||||
|
|
@ -117,9 +146,13 @@ export type AuthRequiredEvent = {
|
|||
event: "authRequired";
|
||||
server: AgentConfig["server"];
|
||||
};
|
||||
export type NewIssueEvent = {
|
||||
event: "newIssue";
|
||||
issue: AgentIssue;
|
||||
};
|
||||
|
||||
export type AgentEvent = StatusChangedEvent | ConfigUpdatedEvent | AuthRequiredEvent;
|
||||
export const agentEventNames: AgentEvent["event"][] = ["statusChanged", "configUpdated", "authRequired"];
|
||||
export type AgentEvent = StatusChangedEvent | ConfigUpdatedEvent | AuthRequiredEvent | NewIssueEvent;
|
||||
export const agentEventNames: AgentEvent["event"][] = ["statusChanged", "configUpdated", "authRequired", "newIssue"];
|
||||
|
||||
export interface AgentEventEmitter {
|
||||
on<T extends AgentEvent>(eventName: T["event"], callback: (event: T) => void): this;
|
||||
|
|
|
|||
|
|
@ -4,8 +4,13 @@ export type AgentConfig = {
|
|||
server: {
|
||||
endpoint: string;
|
||||
requestHeaders: Record<string, string>;
|
||||
requestTimeout: number;
|
||||
};
|
||||
completion: {
|
||||
timeout: {
|
||||
auto: number;
|
||||
manually: number;
|
||||
};
|
||||
maxPrefixLines: number;
|
||||
maxSuffixLines: number;
|
||||
};
|
||||
|
|
@ -31,8 +36,13 @@ export const defaultAgentConfig: AgentConfig = {
|
|||
server: {
|
||||
endpoint: "http://localhost:8080",
|
||||
requestHeaders: {},
|
||||
requestTimeout: 30000, // 30s
|
||||
},
|
||||
completion: {
|
||||
timeout: {
|
||||
auto: 5000, // 5s
|
||||
manually: 30000, // 30s
|
||||
},
|
||||
maxPrefixLines: 20,
|
||||
maxSuffixLines: 20,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -0,0 +1,109 @@
|
|||
import { EventEmitter } from "events";
|
||||
import { rootLogger } from "./logger";
|
||||
|
||||
export type ResponseStatsEntry = {
|
||||
name: string;
|
||||
status: number;
|
||||
responseTime: number;
|
||||
error?: any;
|
||||
};
|
||||
|
||||
export type ResponseStatsStrategy = {
|
||||
maxSize: number;
|
||||
stats: Record<string, (entries: ResponseStatsEntry[]) => number>;
|
||||
checks: {
|
||||
name: string;
|
||||
check: (entries: ResponseStatsEntry[], stats: Record<string, number>) => boolean;
|
||||
}[];
|
||||
};
|
||||
|
||||
export const completionResponseTimeStatsStrategy = {
|
||||
maxSize: 50,
|
||||
stats: {
|
||||
total: (entries: ResponseStatsEntry[]) => entries.length,
|
||||
responses: (entries: ResponseStatsEntry[]) => entries.filter((entry) => entry.status === 200).length,
|
||||
timeouts: (entries: ResponseStatsEntry[]) => entries.filter((entry) => entry.error?.isTimeoutError).length,
|
||||
averageResponseTime: (entries: ResponseStatsEntry[]) =>
|
||||
entries.filter((entry) => entry.status === 200).reduce((acc, entry) => acc + entry.responseTime, 0) /
|
||||
entries.length,
|
||||
},
|
||||
checks: [
|
||||
// check in order and emit the first event that matches
|
||||
// if all the last 5 entries have status 200 and response time less than 3s
|
||||
{
|
||||
name: "healthy",
|
||||
check: (entries: ResponseStatsEntry[], stats) => {
|
||||
const recentEntries = entries.slice(-5);
|
||||
return recentEntries.every((entry) => entry.status === 200 && entry.responseTime < 3000);
|
||||
},
|
||||
},
|
||||
// if TimeoutError percentage is more than 50%, at least 3 requests
|
||||
{
|
||||
name: "highTimeoutRate",
|
||||
check: (entries: ResponseStatsEntry[], stats) => {
|
||||
if (stats.total < 3) {
|
||||
return false;
|
||||
}
|
||||
return stats.timeouts / stats.total > 0.5;
|
||||
},
|
||||
},
|
||||
// if average response time is more than 4s, at least 5 requests
|
||||
{
|
||||
name: "slowResponseTime",
|
||||
check: (entries: ResponseStatsEntry[], stats) => {
|
||||
if (stats.responses < 5) {
|
||||
return false;
|
||||
}
|
||||
return stats.averageResponseTime > 4000;
|
||||
},
|
||||
},
|
||||
],
|
||||
};
|
||||
|
||||
export class ResponseStats extends EventEmitter {
|
||||
private readonly logger = rootLogger.child({ component: "ResponseStats" });
|
||||
private strategy: ResponseStatsStrategy = {
|
||||
maxSize: 0,
|
||||
stats: {},
|
||||
checks: [],
|
||||
};
|
||||
|
||||
private entries: ResponseStatsEntry[] = [];
|
||||
|
||||
constructor(strategy: ResponseStatsStrategy) {
|
||||
super();
|
||||
this.strategy = strategy;
|
||||
}
|
||||
|
||||
push(entry: ResponseStatsEntry): void {
|
||||
this.entries.push(entry);
|
||||
if (this.entries.length > this.strategy.maxSize) {
|
||||
this.entries.shift();
|
||||
}
|
||||
const stats = this.stats();
|
||||
for (const check of this.strategy.checks) {
|
||||
if (check.check(this.entries, stats)) {
|
||||
this.logger.debug({ check: check.name, stats }, "Check condition met");
|
||||
this.emit(check.name, stats);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stats(): Record<string, number> {
|
||||
const result: Record<string, number> = {};
|
||||
for (const [name, stats] of Object.entries(this.strategy.stats)) {
|
||||
result[name] = stats(this.entries);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
check(): string | null {
|
||||
const stats = this.stats();
|
||||
for (const check of this.strategy.checks) {
|
||||
if (check.check(this.entries, stats)) {
|
||||
return check.name;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -8,8 +8,10 @@ import { cancelable, splitLines, isBlank } from "./utils";
|
|||
import {
|
||||
Agent,
|
||||
AgentStatus,
|
||||
AgentIssue,
|
||||
AgentEvent,
|
||||
AgentInitOptions,
|
||||
ServerHealthState,
|
||||
CompletionRequest,
|
||||
CompletionResponse,
|
||||
LogEventRequest,
|
||||
|
|
@ -21,6 +23,7 @@ import { DataStore } from "./dataStore";
|
|||
import { postprocess, preCacheProcess } from "./postprocess";
|
||||
import { rootLogger, allLoggers } from "./logger";
|
||||
import { AnonymousUsageLogger } from "./AnonymousUsageLogger";
|
||||
import { ResponseStats, completionResponseTimeStatsStrategy } from "./ResponseStats";
|
||||
|
||||
/**
|
||||
* Different from AgentInitOptions or AgentConfig, this may contain non-serializable objects,
|
||||
|
|
@ -37,12 +40,15 @@ export class TabbyAgent extends EventEmitter implements Agent {
|
|||
private userConfig: PartialAgentConfig = {}; // config from `~/.tabby/agent/config.toml`
|
||||
private clientConfig: PartialAgentConfig = {}; // config from `initialize` and `updateConfig` method
|
||||
private status: AgentStatus = "notInitialized";
|
||||
private issues: AgentIssue["name"][] = [];
|
||||
private serverHealthState: ServerHealthState | null = null;
|
||||
private api: TabbyApi;
|
||||
private auth: Auth;
|
||||
private dataStore: DataStore | null = null;
|
||||
private completionCache: CompletionCache = new CompletionCache();
|
||||
static readonly tryConnectInterval = 1000 * 30; // 30s
|
||||
private tryingConnectTimer: ReturnType<typeof setInterval> | null = null;
|
||||
private completionResponseStats: ResponseStats = new ResponseStats(completionResponseTimeStatsStrategy);
|
||||
|
||||
private constructor() {
|
||||
super();
|
||||
|
|
@ -53,6 +59,23 @@ export class TabbyAgent extends EventEmitter implements Agent {
|
|||
await this.healthCheck();
|
||||
}
|
||||
}, TabbyAgent.tryConnectInterval);
|
||||
|
||||
this.completionResponseStats.on("healthy", () => {
|
||||
this.popIssue("slowCompletionResponseTime");
|
||||
this.popIssue("highCompletionTimeoutRate");
|
||||
});
|
||||
this.completionResponseStats.on("highTimeoutRate", () => {
|
||||
if (this.status === "ready" || this.status === "issuesExist") {
|
||||
this.popIssue("slowCompletionResponseTime");
|
||||
this.pushIssue("highCompletionTimeoutRate");
|
||||
}
|
||||
});
|
||||
this.completionResponseStats.on("slowResponseTime", () => {
|
||||
if (this.status === "ready" || this.status === "issuesExist") {
|
||||
this.popIssue("highCompletionTimeoutRate");
|
||||
this.pushIssue("slowCompletionResponseTime");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
static async create(options?: TabbyAgentOptions): Promise<TabbyAgent> {
|
||||
|
|
@ -101,6 +124,38 @@ export class TabbyAgent extends EventEmitter implements Agent {
|
|||
}
|
||||
}
|
||||
|
||||
private issueWithDetails(issue: AgentIssue["name"]): AgentIssue {
|
||||
switch (issue) {
|
||||
case "highCompletionTimeoutRate":
|
||||
return {
|
||||
name: "highCompletionTimeoutRate",
|
||||
completionResponseStats: this.completionResponseStats.stats(),
|
||||
};
|
||||
case "slowCompletionResponseTime":
|
||||
return {
|
||||
name: "slowCompletionResponseTime",
|
||||
completionResponseStats: this.completionResponseStats.stats(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private pushIssue(issue: AgentIssue["name"]) {
|
||||
if (this.issues.indexOf(issue) === -1) {
|
||||
this.issues.push(issue);
|
||||
this.changeStatus("issuesExist");
|
||||
const event: AgentEvent = { event: "newIssue", issue: this.issueWithDetails(issue) };
|
||||
this.logger.debug({ event }, "New issue");
|
||||
super.emit("newIssue", event);
|
||||
}
|
||||
}
|
||||
|
||||
private popIssue(issue: AgentIssue["name"]) {
|
||||
this.issues = this.issues.filter((i) => i !== issue);
|
||||
if (this.issues.length === 0 && this.status === "issuesExist") {
|
||||
this.changeStatus("ready");
|
||||
}
|
||||
}
|
||||
|
||||
private emitAuthRequired() {
|
||||
const event: AgentEvent = { event: "authRequired", server: this.config.server };
|
||||
super.emit("authRequired", event);
|
||||
|
|
@ -109,44 +164,99 @@ export class TabbyAgent extends EventEmitter implements Agent {
|
|||
private callApi<Request, Response>(
|
||||
api: (request: Request) => CancelablePromise<Response>,
|
||||
request: Request,
|
||||
options: { timeout?: number } = { timeout: this.config.server.requestTimeout },
|
||||
): CancelablePromise<Response> {
|
||||
this.logger.debug({ api: api.name, request }, "API request");
|
||||
const promise = api.call(this.api.v1, request);
|
||||
return cancelable(
|
||||
promise
|
||||
return new CancelablePromise((resolve, reject, onCancel) => {
|
||||
const requestId = uuid();
|
||||
this.logger.debug({ requestId, api: api.name, request }, "API request");
|
||||
let timeout: ReturnType<typeof setTimeout> | null = null;
|
||||
let timeoutCancelled = false;
|
||||
const apiRequest = api.call(this.api.v1, request);
|
||||
const requestStartedAt = performance.now();
|
||||
apiRequest
|
||||
.then((response: Response) => {
|
||||
this.logger.debug({ api: api.name, response }, "API response");
|
||||
this.changeStatus("ready");
|
||||
return response;
|
||||
this.logger.debug({ requestId, api: api.name, response }, "API response");
|
||||
if (this.status !== "issuesExist") {
|
||||
this.changeStatus("ready");
|
||||
}
|
||||
if (api.name === "completion") {
|
||||
this.completionResponseStats.push({
|
||||
name: api.name,
|
||||
status: 200,
|
||||
responseTime: performance.now() - requestStartedAt,
|
||||
});
|
||||
}
|
||||
if (timeout) {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
resolve(response);
|
||||
})
|
||||
.catch((error) => {
|
||||
if (!!error.isCancelled) {
|
||||
this.logger.debug({ api: api.name, error }, "API request canceled");
|
||||
if (
|
||||
(!!error.isCancelled && timeoutCancelled) ||
|
||||
(!error.isCancelled && error.code === "ECONNABORTED") ||
|
||||
(error.name === "ApiError" && [408, 499].indexOf(error.status) !== -1)
|
||||
) {
|
||||
error.isTimeoutError = true;
|
||||
this.logger.debug({ requestId, api: api.name, error }, "API request timeout");
|
||||
} else if (!!error.isCancelled) {
|
||||
this.logger.debug({ requestId, api: api.name, error }, "API request cancelled");
|
||||
} else if (
|
||||
error.name === "ApiError" &&
|
||||
[401, 403, 405].indexOf(error.status) !== -1 &&
|
||||
new URL(this.config.server.endpoint).hostname.endsWith("app.tabbyml.com") &&
|
||||
this.config.server.requestHeaders["Authorization"] === undefined
|
||||
) {
|
||||
this.logger.debug({ api: api.name, error }, "API unauthorized");
|
||||
this.logger.debug({ requestId, api: api.name, error }, "API unauthorized");
|
||||
this.changeStatus("unauthorized");
|
||||
} else if (error.name === "ApiError") {
|
||||
this.logger.error({ api: api.name, error }, "API error");
|
||||
this.logger.error({ requestId, api: api.name, error }, "API error");
|
||||
this.changeStatus("disconnected");
|
||||
} else {
|
||||
this.logger.error({ api: api.name, error }, "API request failed with unknown error");
|
||||
this.logger.error({ requestId, api: api.name, error }, "API request failed with unknown error");
|
||||
this.changeStatus("disconnected");
|
||||
}
|
||||
throw error;
|
||||
}),
|
||||
() => {
|
||||
promise.cancel();
|
||||
},
|
||||
);
|
||||
// don't record cancelled request in stats
|
||||
if (api.name === "completion" && (error.isTimeoutError || !error.isCancelled)) {
|
||||
this.completionResponseStats.push({
|
||||
name: api.name,
|
||||
status: error.status,
|
||||
responseTime: performance.now() - requestStartedAt,
|
||||
error,
|
||||
});
|
||||
}
|
||||
if (timeout) {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
reject(error);
|
||||
});
|
||||
// It seems that openapi-typescript-codegen does not provide timeout options passing to axios,
|
||||
// Just use setTimeout to cancel the request manually.
|
||||
if (options.timeout && options.timeout > 0) {
|
||||
timeout = setTimeout(
|
||||
() => {
|
||||
this.logger.debug({ api: api.name, timeout: options.timeout }, "Cancel API request due to timeout");
|
||||
timeoutCancelled = true;
|
||||
apiRequest.cancel();
|
||||
},
|
||||
Math.min(options.timeout, 0x7fffffff),
|
||||
);
|
||||
}
|
||||
onCancel(() => {
|
||||
if (timeout) {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
apiRequest.cancel();
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
private healthCheck(): Promise<any> {
|
||||
return this.callApi(this.api.v1.health, {}).catch(() => {});
|
||||
return this.callApi(this.api.v1.health, {})
|
||||
.then((healthState) => {
|
||||
this.serverHealthState = healthState;
|
||||
})
|
||||
.catch(() => {});
|
||||
}
|
||||
|
||||
private createSegments(request: CompletionRequest): { prefix: string; suffix: string } {
|
||||
|
|
@ -198,6 +308,10 @@ export class TabbyAgent extends EventEmitter implements Agent {
|
|||
}
|
||||
const prevStatus = this.status;
|
||||
await this.applyConfig();
|
||||
// If server config changed, clear server health state
|
||||
if (key.startsWith("server")) {
|
||||
this.serverHealthState = null;
|
||||
}
|
||||
// If status unchanged, `authRequired` will not be emitted when `applyConfig`,
|
||||
// so we need to emit it manually.
|
||||
if (key.startsWith("server") && prevStatus === "unauthorized" && this.status === "unauthorized") {
|
||||
|
|
@ -222,6 +336,14 @@ export class TabbyAgent extends EventEmitter implements Agent {
|
|||
return this.status;
|
||||
}
|
||||
|
||||
public getIssues(): AgentIssue[] {
|
||||
return this.issues.map((issue) => this.issueWithDetails(issue));
|
||||
}
|
||||
|
||||
public getServerHealthState(): ServerHealthState | null {
|
||||
return this.serverHealthState;
|
||||
}
|
||||
|
||||
public requestAuthUrl(): CancelablePromise<{ authUrl: string; code: string } | null> {
|
||||
if (this.status === "notInitialized") {
|
||||
return cancelable(Promise.reject("Agent is not initialized"), () => {});
|
||||
|
|
@ -283,11 +405,17 @@ export class TabbyAgent extends EventEmitter implements Agent {
|
|||
choices: [],
|
||||
};
|
||||
}
|
||||
const apiRequest = this.callApi(this.api.v1.completion, {
|
||||
language: request.language,
|
||||
segments,
|
||||
user: this.auth?.user,
|
||||
});
|
||||
const apiRequest = this.callApi(
|
||||
this.api.v1.completion,
|
||||
{
|
||||
language: request.language,
|
||||
segments,
|
||||
user: this.auth?.user,
|
||||
},
|
||||
{
|
||||
timeout: request.manually ? this.config.completion.timeout.manually : this.config.completion.timeout.auto,
|
||||
},
|
||||
);
|
||||
cancelableList.push(apiRequest);
|
||||
return apiRequest
|
||||
.then((response) => {
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import {
|
|||
InlineCompletionItem,
|
||||
InlineCompletionItemProvider,
|
||||
InlineCompletionList,
|
||||
InlineCompletionTriggerKind,
|
||||
Position,
|
||||
ProviderResult,
|
||||
Range,
|
||||
|
|
@ -65,6 +66,7 @@ export class TabbyCompletionProvider implements InlineCompletionItemProvider {
|
|||
language: document.languageId, // https://code.visualstudio.com/docs/languages/identifiers
|
||||
text: document.getText(),
|
||||
position: document.offsetAt(position),
|
||||
manually: context.triggerKind === InlineCompletionTriggerKind.Invoke,
|
||||
maxPrefixLines: this.maxPrefixLines,
|
||||
maxSuffixLines: this.maxSuffixLines,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -174,6 +174,16 @@ const statusBarItemClicked: Command = {
|
|||
case "unauthorized":
|
||||
notifications.showInformationStartAuth();
|
||||
break;
|
||||
case "issuesExist":
|
||||
switch (agent().getIssues()[0]?.name) {
|
||||
case "slowCompletionResponseTime":
|
||||
notifications.showInformationWhenSlowCompletionResponseTime();
|
||||
break;
|
||||
case "highCompletionTimeoutRate":
|
||||
notifications.showInformationWhenHighCompletionTimeoutRate();
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case "disabled":
|
||||
const enabled = workspace.getConfiguration("tabby").get("codeCompletion", true);
|
||||
const inlineSuggestEnabled = workspace.getConfiguration("editor").get("inlineSuggest.enabled", true);
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import { commands, window, workspace, ConfigurationTarget } from "vscode";
|
||||
import { commands, window, workspace, env, ConfigurationTarget, Uri } from "vscode";
|
||||
import { agent } from "./agent";
|
||||
|
||||
function showInformationWhenLoading() {
|
||||
window.showInformationMessage("Tabby is initializing.", "Settings").then((selection) => {
|
||||
|
|
@ -109,6 +110,113 @@ function showInformationWhenInlineSuggestDisabled() {
|
|||
});
|
||||
}
|
||||
|
||||
function getHelpMessageForCompletionResponseTimeIssue() {
|
||||
let helpMessageForRunningLargeModelOnCPU = "";
|
||||
const serverHealthState = agent().getServerHealthState();
|
||||
if (serverHealthState?.device === "cpu" && serverHealthState?.model?.match(/[0-9\.]+B$/)) {
|
||||
helpMessageForRunningLargeModelOnCPU +=
|
||||
`Your Tabby server is running model ${serverHealthState?.model} on CPU. ` +
|
||||
"This model is too large to run on CPU, please try a smaller model or switch to GPU. " +
|
||||
"You can find supported model list by search TabbyML on HuggingFace. \n";
|
||||
}
|
||||
let message = "";
|
||||
if (helpMessageForRunningLargeModelOnCPU.length > 0) {
|
||||
message += helpMessageForRunningLargeModelOnCPU + "\n";
|
||||
message += "Other possible causes of this issue are: \n";
|
||||
} else {
|
||||
message += "Possible causes of this issue are: \n";
|
||||
}
|
||||
message += " - A poor network connection. Please check your network and proxy settings.\n";
|
||||
message += " - Server overload. Please contact your Tabby server administrator for assistance.\n";
|
||||
if (helpMessageForRunningLargeModelOnCPU.length == 0) {
|
||||
message += ` - The running model ${serverHealthState?.model ?? ""} is too large to run on your Tabby server. `;
|
||||
message += "Please try a smaller model. You can find supported model list by search TabbyML on HuggingFace.\n";
|
||||
}
|
||||
return message;
|
||||
}
|
||||
|
||||
function showInformationWhenSlowCompletionResponseTime(modal: boolean = false) {
|
||||
if (modal) {
|
||||
const stats = agent()
|
||||
.getIssues()
|
||||
.find((issue) => issue.name === "slowCompletionResponseTime")?.completionResponseStats;
|
||||
let statsMessage = "";
|
||||
if (stats && stats["responses"] && stats["averageResponseTime"]) {
|
||||
statsMessage = `The average response time of recent ${stats["responses"]} completion requests is ${Number(
|
||||
stats["averageResponseTime"],
|
||||
).toFixed(0)}ms.\n\n`;
|
||||
}
|
||||
window
|
||||
.showWarningMessage(
|
||||
"Completion requests appear to take too much time.",
|
||||
{
|
||||
modal: true,
|
||||
detail: statsMessage + getHelpMessageForCompletionResponseTimeIssue(),
|
||||
},
|
||||
"Supported Models",
|
||||
)
|
||||
.then((selection) => {
|
||||
switch (selection) {
|
||||
case "Supported Models":
|
||||
env.openExternal(Uri.parse("https://huggingface.co/models?search=tabbyml"));
|
||||
break;
|
||||
}
|
||||
});
|
||||
} else {
|
||||
window
|
||||
.showWarningMessage("Completion requests appear to take too much time.", "Detail", "Settings")
|
||||
.then((selection) => {
|
||||
switch (selection) {
|
||||
case "Detail":
|
||||
showInformationWhenSlowCompletionResponseTime(true);
|
||||
break;
|
||||
case "Settings":
|
||||
commands.executeCommand("tabby.openSettings");
|
||||
break;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
function showInformationWhenHighCompletionTimeoutRate(modal: boolean = false) {
|
||||
if (modal) {
|
||||
const stats = agent()
|
||||
.getIssues()
|
||||
.find((issue) => issue.name === "highCompletionTimeoutRate")?.completionResponseStats;
|
||||
let statsMessage = "";
|
||||
if (stats && stats["total"] && stats["timeouts"]) {
|
||||
statsMessage = `${stats["timeouts"]} of ${stats["total"]} completion requests timed out.\n\n`;
|
||||
}
|
||||
window
|
||||
.showWarningMessage(
|
||||
"Most completion requests timed out.",
|
||||
{
|
||||
modal: true,
|
||||
detail: statsMessage + getHelpMessageForCompletionResponseTimeIssue(),
|
||||
},
|
||||
"Supported Models",
|
||||
)
|
||||
.then((selection) => {
|
||||
switch (selection) {
|
||||
case "Supported Models":
|
||||
env.openExternal(Uri.parse("https://huggingface.co/models?search=tabbyml"));
|
||||
break;
|
||||
}
|
||||
});
|
||||
} else {
|
||||
window.showWarningMessage("Most completion requests timed out.", "Detail", "Settings").then((selection) => {
|
||||
switch (selection) {
|
||||
case "Detail":
|
||||
showInformationWhenHighCompletionTimeoutRate(true);
|
||||
break;
|
||||
case "Settings":
|
||||
commands.executeCommand("tabby.openSettings");
|
||||
break;
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
export const notifications = {
|
||||
showInformationWhenLoading,
|
||||
showInformationWhenDisabled,
|
||||
|
|
@ -119,4 +227,6 @@ export const notifications = {
|
|||
showInformationWhenStartAuthButAlreadyAuthorized,
|
||||
showInformationWhenAuthFailed,
|
||||
showInformationWhenInlineSuggestDisabled,
|
||||
showInformationWhenSlowCompletionResponseTime,
|
||||
showInformationWhenHighCompletionTimeoutRate,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ const iconLoading = "$(loading~spin)";
|
|||
const iconReady = "$(check)";
|
||||
const iconDisconnected = "$(plug)";
|
||||
const iconUnauthorized = "$(key)";
|
||||
const iconIssueExist = "$(warning)";
|
||||
const iconDisabled = "$(x)";
|
||||
const colorNormal = new ThemeColor("statusBar.foreground");
|
||||
const colorWarning = new ThemeColor("statusBarItem.warningForeground");
|
||||
|
|
@ -20,15 +21,33 @@ const fsm = createMachine({
|
|||
initial: "loading",
|
||||
states: {
|
||||
loading: {
|
||||
on: { ready: "ready", disconnected: "disconnected", unauthorized: "unauthorized", disabled: "disabled" },
|
||||
on: {
|
||||
ready: "ready",
|
||||
disconnected: "disconnected",
|
||||
unauthorized: "unauthorized",
|
||||
issuesExist: "issuesExist",
|
||||
disabled: "disabled",
|
||||
},
|
||||
entry: () => toLoading(),
|
||||
},
|
||||
ready: {
|
||||
on: { disconnected: "disconnected", unauthorized: "unauthorized", disabled: "disabled" },
|
||||
on: {
|
||||
loading: "loading",
|
||||
disconnected: "disconnected",
|
||||
unauthorized: "unauthorized",
|
||||
issuesExist: "issuesExist",
|
||||
disabled: "disabled",
|
||||
},
|
||||
entry: () => toReady(),
|
||||
},
|
||||
disconnected: {
|
||||
on: { ready: "ready", unauthorized: "unauthorized", disabled: "disabled" },
|
||||
on: {
|
||||
loading: "loading",
|
||||
ready: "ready",
|
||||
unauthorized: "unauthorized",
|
||||
issuesExist: "issuesExist",
|
||||
disabled: "disabled",
|
||||
},
|
||||
entry: () => toDisconnected(),
|
||||
},
|
||||
unauthorized: {
|
||||
|
|
@ -36,17 +55,39 @@ const fsm = createMachine({
|
|||
ready: "ready",
|
||||
disconnected: "disconnected",
|
||||
disabled: "disabled",
|
||||
issuesExist: "issuesExist",
|
||||
authStart: "unauthorizedAndAuthInProgress",
|
||||
},
|
||||
entry: () => toUnauthorized(),
|
||||
},
|
||||
unauthorizedAndAuthInProgress: {
|
||||
// if auth succeeds, we will get `ready` before `authEnd` event
|
||||
on: { ready: "ready", disconnected: "disconnected", disabled: "disabled", authEnd: "unauthorized" },
|
||||
on: {
|
||||
ready: "ready",
|
||||
disconnected: "disconnected",
|
||||
issuesExist: "issuesExist",
|
||||
disabled: "disabled",
|
||||
authEnd: "unauthorized", // if auth succeeds, we will get `ready` before `authEnd` event
|
||||
},
|
||||
entry: () => toUnauthorizedAndAuthInProgress(),
|
||||
},
|
||||
issuesExist: {
|
||||
on: {
|
||||
loading: "loading",
|
||||
ready: "ready",
|
||||
disconnected: "disconnected",
|
||||
unauthorized: "unauthorized",
|
||||
disabled: "disabled",
|
||||
},
|
||||
entry: () => toIssuesExist(),
|
||||
},
|
||||
disabled: {
|
||||
on: { loading: "loading", ready: "ready", disconnected: "disconnected", unauthorized: "unauthorized" },
|
||||
on: {
|
||||
loading: "loading",
|
||||
ready: "ready",
|
||||
disconnected: "disconnected",
|
||||
unauthorized: "unauthorized",
|
||||
issuesExist: "issuesExist",
|
||||
},
|
||||
entry: () => toDisabled(),
|
||||
},
|
||||
},
|
||||
|
|
@ -93,6 +134,24 @@ function toUnauthorizedAndAuthInProgress() {
|
|||
item.command = undefined;
|
||||
}
|
||||
|
||||
function toIssuesExist() {
|
||||
item.color = colorWarning;
|
||||
item.backgroundColor = backgroundColorWarning;
|
||||
item.text = `${iconIssueExist} ${label}`;
|
||||
switch (agent().getIssues()[0]?.name) {
|
||||
case "slowCompletionResponseTime":
|
||||
item.tooltip = "Completion requests appear to take too much time.";
|
||||
break;
|
||||
case "highCompletionTimeoutRate":
|
||||
item.tooltip = "Most completion requests timed out.";
|
||||
break;
|
||||
default:
|
||||
item.tooltip = "";
|
||||
break;
|
||||
}
|
||||
item.command = { title: "", command: "tabby.statusBarItemClicked", arguments: ["issuesExist"] };
|
||||
}
|
||||
|
||||
function toDisabled() {
|
||||
item.color = colorWarning;
|
||||
item.backgroundColor = backgroundColorWarning;
|
||||
|
|
@ -115,6 +174,7 @@ function updateStatusBarItem() {
|
|||
case "ready":
|
||||
case "disconnected":
|
||||
case "unauthorized":
|
||||
case "issuesExist":
|
||||
fsmService.send(status);
|
||||
break;
|
||||
}
|
||||
|
|
@ -143,6 +203,14 @@ export const tabbyStatusBarItem = () => {
|
|||
});
|
||||
});
|
||||
|
||||
agent().on("newIssue", (event) => {
|
||||
if (event.issue.name === "slowCompletionResponseTime") {
|
||||
notifications.showInformationWhenSlowCompletionResponseTime();
|
||||
} else if (event.issue.name === "highCompletionTimeoutRate") {
|
||||
notifications.showInformationWhenHighCompletionTimeoutRate();
|
||||
}
|
||||
});
|
||||
|
||||
item.show();
|
||||
return item;
|
||||
};
|
||||
|
|
|
|||
Loading…
Reference in New Issue