feat(agent): monitor completion response time. (#359)

* feat(agent): monitor completion response time. * fix(agent): improve help message for running large model on cpu. * fix: notification spacing.
2023-08-17 22:28:41 +08:00 · 2023-08-17 22:28:41 +08:00 · 732d83feef
parent 2026b4dd0e
commit 732d83feef
9 changed files with 533 additions and 136 deletions
--- a/clients/tabby-agent/openapi/tabby.json
+++ b/clients/tabby-agent/openapi/tabby.json
@ -37,17 +37,9 @@
        "responses": {
          "200": {
            "description": "Success",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/CompletionResponse"
-                }
-              }
-            }
+            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CompletionResponse" } } }
          },
-          "400": {
-            "description": "Bad Request"
-          }
+          "400": { "description": "Bad Request" }
        }
      }
    },
@ -56,23 +48,10 @@
        "tags": ["v1"],
        "operationId": "event",
        "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/LogEventRequest"
-              }
-            }
-          },
+          "content": { "application/json": { "schema": { "$ref": "#/components/schemas/LogEventRequest" } } },
          "required": true
        },
-        "responses": {
-          "200": {
-            "description": "Success"
-          },
-          "400": {
-            "description": "Bad Request"
-          }
-        }
+        "responses": { "200": { "description": "Success" }, "400": { "description": "Bad Request" } }
      }
    },
    "/v1/health": {
@ -82,13 +61,7 @@
        "responses": {
          "200": {
            "description": "Success",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/HealthState"
-                }
-              }
-            }
+            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HealthState" } } }
          }
        }
      }
@ -100,108 +73,62 @@
        "type": "object",
        "required": ["index", "text"],
        "properties": {
-          "index": {
-            "type": "integer",
-            "format": "int32",
-            "minimum": 0
-          },
-          "text": {
-            "type": "string"
-          }
+          "index": { "type": "integer", "format": "int32", "minimum": 0.0 },
+          "text": { "type": "string" }
        }
      },
      "CompletionRequest": {
        "type": "object",
        "properties": {
-          "prompt": {
-            "type": "string",
-            "example": "def fib(n):",
-            "nullable": true
-          },
+          "prompt": { "type": "string", "example": "def fib(n):", "nullable": true },
          "language": {
            "type": "string",
            "description": "Language identifier, full list is maintained at\nhttps://code.visualstudio.com/docs/languages/identifiers",
            "example": "python",
            "nullable": true
          },
-          "segments": {
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/Segments"
-              }
-            ],
-            "nullable": true
-          },
-          "user": {
-            "type": "string",
-            "nullable": true
-          }
+          "segments": { "allOf": [{ "$ref": "#/components/schemas/Segments" }], "nullable": true },
+          "user": { "type": "string", "nullable": true }
        },
        "example": {
          "language": "python",
-          "segments": {
-            "prefix": "def fib(n):\n    ",
-            "suffix": "\n        return fib(n - 1) + fib(n - 2)"
-          }
+          "segments": { "prefix": "def fib(n):\n    ", "suffix": "\n        return fib(n - 1) + fib(n - 2)" }
        }
      },
      "CompletionResponse": {
        "type": "object",
        "required": ["id", "choices"],
        "properties": {
-          "id": {
-            "type": "string"
-          },
-          "choices": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/Choice"
-            }
-          }
+          "id": { "type": "string" },
+          "choices": { "type": "array", "items": { "$ref": "#/components/schemas/Choice" } }
        }
      },
      "HealthState": {
        "type": "object",
-        "required": ["model", "device", "compute_type"],
+        "required": ["model", "device", "compute_type", "arch", "cpu_info", "cpu_count"],
        "properties": {
-          "model": {
-            "type": "string"
-          },
-          "device": {
-            "type": "string"
-          },
-          "compute_type": {
-            "type": "string"
-          }
+          "model": { "type": "string" },
+          "device": { "type": "string" },
+          "compute_type": { "type": "string" },
+          "arch": { "type": "string" },
+          "cpu_info": { "type": "string" },
+          "cpu_count": { "type": "integer", "minimum": 0.0 }
        }
      },
      "LogEventRequest": {
        "type": "object",
        "required": ["type", "completion_id", "choice_index"],
        "properties": {
-          "type": {
-            "type": "string",
-            "description": "Event type, should be `view` or `select`.",
-            "example": "view"
-          },
-          "completion_id": {
-            "type": "string"
-          },
-          "choice_index": {
-            "type": "integer",
-            "format": "int32",
-            "minimum": 0
-          }
+          "type": { "type": "string", "description": "Event type, should be `view` or `select`.", "example": "view" },
+          "completion_id": { "type": "string" },
+          "choice_index": { "type": "integer", "format": "int32", "minimum": 0.0 }
        }
      },
      "Segments": {
        "type": "object",
        "required": ["prefix"],
        "properties": {
-          "prefix": {
-            "type": "string",
-            "description": "Content that appears before the cursor in the editor window."
-          },
+          "prefix": { "type": "string", "description": "Content that appears before the cursor in the editor window." },
          "suffix": {
            "type": "string",
            "description": "Content that appears after the cursor in the editor window.",
--- a/clients/tabby-agent/src/Agent.ts
+++ b/clients/tabby-agent/src/Agent.ts
@ -2,6 +2,7 @@ import {
  CancelablePromise,
  LogEventRequest as ApiLogEventRequest,
  CompletionResponse as ApiCompletionResponse,
+  HealthState,
 } from "./generated";

 import { AgentConfig, PartialAgentConfig } from "./AgentConfig";
@ -11,11 +12,14 @@ export type AgentInitOptions = Partial<{
  client: string;
 }>;

+export type ServerHealthState = HealthState;
+
 export type CompletionRequest = {
  filepath: string;
  language: string;
  text: string;
  position: number;
+  manually?: boolean;
  maxPrefixLines?: number;
  maxSuffixLines?: number;
 };
@ -24,16 +28,31 @@ export type CompletionResponse = ApiCompletionResponse;

 export type LogEventRequest = ApiLogEventRequest;

+export type SlowCompletionResponseTimeIssue = {
+  name: "slowCompletionResponseTime";
+  completionResponseStats: Record<string, number>;
+};
+export type HighCompletionTimeoutRateIssue = {
+  name: "highCompletionTimeoutRate";
+  completionResponseStats: Record<string, number>;
+};
+export type AgentIssue = SlowCompletionResponseTimeIssue | HighCompletionTimeoutRateIssue;
+
 /**
- * `notInitialized`: When the agent is not initialized.
- * `ready`: When the agent get a valid response from the server, and is ready to use.
- * `disconnected`: When the agent failed to connect to the server.
- * `unauthorized`: When the server is set to a Tabby Cloud endpoint that requires auth,
+ * Represents the status of the agent.
+ * @enum
+ * @property {string} notInitialized - When the agent is not initialized.
+ * @property {string} ready - When the agent gets a valid response from the server.
+ * @property {string} disconnected - When the agent fails to connect to the server.
+ * @property {string} unauthorized - When the server is set to a Tabby Cloud endpoint that requires auth,
 *   and no `Authorization` request header is provided in the agent config,
- *   and user has not completed the auth flow or the auth token is expired.
+ *   and the user has not completed the auth flow or the auth token is expired.
 *   See also `requestAuthUrl` and `waitForAuthToken`.
+ * @property {string} issuesExist - When the agent gets a valid response from the server, but still
+ *   has some non-blocking issues, e.g. the average completion response time is too slow,
+ *   or the timeout rate is too high.
 */
-export type AgentStatus = "notInitialized" | "ready" | "disconnected" | "unauthorized";
+export type AgentStatus = "notInitialized" | "ready" | "disconnected" | "unauthorized" | "issuesExist";

 export interface AgentFunction {
  /**
@ -70,6 +89,16 @@ export interface AgentFunction {
   */
  getStatus(): AgentStatus;

+  /**
+   * @returns the current issues if AgentStatus is `issuesExist`, otherwise returns empty array
+   */
+  getIssues(): AgentIssue[];
+
+  /**
+   * @returns server info returned from latest server health check, returns null if not available
+   */
+  getServerHealthState(): ServerHealthState | null;
+
  /**
   * Request auth url for Tabby Cloud endpoint. Only return value when the `AgentStatus` is `unauthorized`.
   * Otherwise, return null. See also `AgentStatus`.
@ -117,9 +146,13 @@ export type AuthRequiredEvent = {
  event: "authRequired";
  server: AgentConfig["server"];
 };
+export type NewIssueEvent = {
+  event: "newIssue";
+  issue: AgentIssue;
+};

-export type AgentEvent = StatusChangedEvent | ConfigUpdatedEvent | AuthRequiredEvent;
-export const agentEventNames: AgentEvent["event"][] = ["statusChanged", "configUpdated", "authRequired"];
+export type AgentEvent = StatusChangedEvent | ConfigUpdatedEvent | AuthRequiredEvent | NewIssueEvent;
+export const agentEventNames: AgentEvent["event"][] = ["statusChanged", "configUpdated", "authRequired", "newIssue"];

 export interface AgentEventEmitter {
  on<T extends AgentEvent>(eventName: T["event"], callback: (event: T) => void): this;
--- a/clients/tabby-agent/src/AgentConfig.ts
+++ b/clients/tabby-agent/src/AgentConfig.ts
@ -4,8 +4,13 @@ export type AgentConfig = {
  server: {
    endpoint: string;
    requestHeaders: Record<string, string>;
+    requestTimeout: number;
  };
  completion: {
+    timeout: {
+      auto: number;
+      manually: number;
+    };
    maxPrefixLines: number;
    maxSuffixLines: number;
  };
@ -31,8 +36,13 @@ export const defaultAgentConfig: AgentConfig = {
  server: {
    endpoint: "http://localhost:8080",
    requestHeaders: {},
+    requestTimeout: 30000, // 30s
  },
  completion: {
+    timeout: {
+      auto: 5000, // 5s
+      manually: 30000, // 30s
+    },
    maxPrefixLines: 20,
    maxSuffixLines: 20,
  },
--- a/clients/tabby-agent/src/ResponseStats.ts
+++ b/clients/tabby-agent/src/ResponseStats.ts
@ -0,0 +1,109 @@
+import { EventEmitter } from "events";
+import { rootLogger } from "./logger";
+
+export type ResponseStatsEntry = {
+  name: string;
+  status: number;
+  responseTime: number;
+  error?: any;
+};
+
+export type ResponseStatsStrategy = {
+  maxSize: number;
+  stats: Record<string, (entries: ResponseStatsEntry[]) => number>;
+  checks: {
+    name: string;
+    check: (entries: ResponseStatsEntry[], stats: Record<string, number>) => boolean;
+  }[];
+};
+
+export const completionResponseTimeStatsStrategy = {
+  maxSize: 50,
+  stats: {
+    total: (entries: ResponseStatsEntry[]) => entries.length,
+    responses: (entries: ResponseStatsEntry[]) => entries.filter((entry) => entry.status === 200).length,
+    timeouts: (entries: ResponseStatsEntry[]) => entries.filter((entry) => entry.error?.isTimeoutError).length,
+    averageResponseTime: (entries: ResponseStatsEntry[]) =>
+      entries.filter((entry) => entry.status === 200).reduce((acc, entry) => acc + entry.responseTime, 0) /
+      entries.length,
+  },
+  checks: [
+    // check in order and emit the first event that matches
+    // if all the last 5 entries have status 200 and response time less than 3s
+    {
+      name: "healthy",
+      check: (entries: ResponseStatsEntry[], stats) => {
+        const recentEntries = entries.slice(-5);
+        return recentEntries.every((entry) => entry.status === 200 && entry.responseTime < 3000);
+      },
+    },
+    // if TimeoutError percentage is more than 50%, at least 3 requests
+    {
+      name: "highTimeoutRate",
+      check: (entries: ResponseStatsEntry[], stats) => {
+        if (stats.total < 3) {
+          return false;
+        }
+        return stats.timeouts / stats.total > 0.5;
+      },
+    },
+    // if average response time is more than 4s, at least 5 requests
+    {
+      name: "slowResponseTime",
+      check: (entries: ResponseStatsEntry[], stats) => {
+        if (stats.responses < 5) {
+          return false;
+        }
+        return stats.averageResponseTime > 4000;
+      },
+    },
+  ],
+};
+
+export class ResponseStats extends EventEmitter {
+  private readonly logger = rootLogger.child({ component: "ResponseStats" });
+  private strategy: ResponseStatsStrategy = {
+    maxSize: 0,
+    stats: {},
+    checks: [],
+  };
+
+  private entries: ResponseStatsEntry[] = [];
+
+  constructor(strategy: ResponseStatsStrategy) {
+    super();
+    this.strategy = strategy;
+  }
+
+  push(entry: ResponseStatsEntry): void {
+    this.entries.push(entry);
+    if (this.entries.length > this.strategy.maxSize) {
+      this.entries.shift();
+    }
+    const stats = this.stats();
+    for (const check of this.strategy.checks) {
+      if (check.check(this.entries, stats)) {
+        this.logger.debug({ check: check.name, stats }, "Check condition met");
+        this.emit(check.name, stats);
+      }
+    }
+  }
+
+  stats(): Record<string, number> {
+    const result: Record<string, number> = {};
+    for (const [name, stats] of Object.entries(this.strategy.stats)) {
+      result[name] = stats(this.entries);
+    }
+    return result;
+  }
+
+  check(): string | null {
+    const stats = this.stats();
+    for (const check of this.strategy.checks) {
+      if (check.check(this.entries, stats)) {
+        return check.name;
+      }
+    }
+    return null;
+  }
+}
--- a/clients/tabby-agent/src/TabbyAgent.ts
+++ b/clients/tabby-agent/src/TabbyAgent.ts
@ -8,8 +8,10 @@ import { cancelable, splitLines, isBlank } from "./utils";
 import {
  Agent,
  AgentStatus,
+  AgentIssue,
  AgentEvent,
  AgentInitOptions,
+  ServerHealthState,
  CompletionRequest,
  CompletionResponse,
  LogEventRequest,
@ -21,6 +23,7 @@ import { DataStore } from "./dataStore";
 import { postprocess, preCacheProcess } from "./postprocess";
 import { rootLogger, allLoggers } from "./logger";
 import { AnonymousUsageLogger } from "./AnonymousUsageLogger";
+import { ResponseStats, completionResponseTimeStatsStrategy } from "./ResponseStats";

 /**
 * Different from AgentInitOptions or AgentConfig, this may contain non-serializable objects,
@ -37,12 +40,15 @@ export class TabbyAgent extends EventEmitter implements Agent {
  private userConfig: PartialAgentConfig = {}; // config from `~/.tabby/agent/config.toml`
  private clientConfig: PartialAgentConfig = {}; // config from `initialize` and `updateConfig` method
  private status: AgentStatus = "notInitialized";
+  private issues: AgentIssue["name"][] = [];
+  private serverHealthState: ServerHealthState | null = null;
  private api: TabbyApi;
  private auth: Auth;
  private dataStore: DataStore | null = null;
  private completionCache: CompletionCache = new CompletionCache();
  static readonly tryConnectInterval = 1000 * 30; // 30s
  private tryingConnectTimer: ReturnType<typeof setInterval> | null = null;
+  private completionResponseStats: ResponseStats = new ResponseStats(completionResponseTimeStatsStrategy);

  private constructor() {
    super();
@ -53,6 +59,23 @@ export class TabbyAgent extends EventEmitter implements Agent {
        await this.healthCheck();
      }
    }, TabbyAgent.tryConnectInterval);
+
+    this.completionResponseStats.on("healthy", () => {
+      this.popIssue("slowCompletionResponseTime");
+      this.popIssue("highCompletionTimeoutRate");
+    });
+    this.completionResponseStats.on("highTimeoutRate", () => {
+      if (this.status === "ready" || this.status === "issuesExist") {
+        this.popIssue("slowCompletionResponseTime");
+        this.pushIssue("highCompletionTimeoutRate");
+      }
+    });
+    this.completionResponseStats.on("slowResponseTime", () => {
+      if (this.status === "ready" || this.status === "issuesExist") {
+        this.popIssue("highCompletionTimeoutRate");
+        this.pushIssue("slowCompletionResponseTime");
+      }
+    });
  }

  static async create(options?: TabbyAgentOptions): Promise<TabbyAgent> {
@ -101,6 +124,38 @@ export class TabbyAgent extends EventEmitter implements Agent {
    }
  }

+  private issueWithDetails(issue: AgentIssue["name"]): AgentIssue {
+    switch (issue) {
+      case "highCompletionTimeoutRate":
+        return {
+          name: "highCompletionTimeoutRate",
+          completionResponseStats: this.completionResponseStats.stats(),
+        };
+      case "slowCompletionResponseTime":
+        return {
+          name: "slowCompletionResponseTime",
+          completionResponseStats: this.completionResponseStats.stats(),
+        };
+    }
+  }
+
+  private pushIssue(issue: AgentIssue["name"]) {
+    if (this.issues.indexOf(issue) === -1) {
+      this.issues.push(issue);
+      this.changeStatus("issuesExist");
+      const event: AgentEvent = { event: "newIssue", issue: this.issueWithDetails(issue) };
+      this.logger.debug({ event }, "New issue");
+      super.emit("newIssue", event);
+    }
+  }
+
+  private popIssue(issue: AgentIssue["name"]) {
+    this.issues = this.issues.filter((i) => i !== issue);
+    if (this.issues.length === 0 && this.status === "issuesExist") {
+      this.changeStatus("ready");
+    }
+  }
+
  private emitAuthRequired() {
    const event: AgentEvent = { event: "authRequired", server: this.config.server };
    super.emit("authRequired", event);
@ -109,44 +164,99 @@ export class TabbyAgent extends EventEmitter implements Agent {
  private callApi<Request, Response>(
    api: (request: Request) => CancelablePromise<Response>,
    request: Request,
+    options: { timeout?: number } = { timeout: this.config.server.requestTimeout },
  ): CancelablePromise<Response> {
-    this.logger.debug({ api: api.name, request }, "API request");
-    const promise = api.call(this.api.v1, request);
-    return cancelable(
-      promise
+    return new CancelablePromise((resolve, reject, onCancel) => {
+      const requestId = uuid();
+      this.logger.debug({ requestId, api: api.name, request }, "API request");
+      let timeout: ReturnType<typeof setTimeout> | null = null;
+      let timeoutCancelled = false;
+      const apiRequest = api.call(this.api.v1, request);
+      const requestStartedAt = performance.now();
+      apiRequest
        .then((response: Response) => {
-          this.logger.debug({ api: api.name, response }, "API response");
-          this.changeStatus("ready");
-          return response;
+          this.logger.debug({ requestId, api: api.name, response }, "API response");
+          if (this.status !== "issuesExist") {
+            this.changeStatus("ready");
+          }
+          if (api.name === "completion") {
+            this.completionResponseStats.push({
+              name: api.name,
+              status: 200,
+              responseTime: performance.now() - requestStartedAt,
+            });
+          }
+          if (timeout) {
+            clearTimeout(timeout);
+          }
+          resolve(response);
        })
        .catch((error) => {
-          if (!!error.isCancelled) {
-            this.logger.debug({ api: api.name, error }, "API request canceled");
+          if (
+            (!!error.isCancelled && timeoutCancelled) ||
+            (!error.isCancelled && error.code === "ECONNABORTED") ||
+            (error.name === "ApiError" && [408, 499].indexOf(error.status) !== -1)
+          ) {
+            error.isTimeoutError = true;
+            this.logger.debug({ requestId, api: api.name, error }, "API request timeout");
+          } else if (!!error.isCancelled) {
+            this.logger.debug({ requestId, api: api.name, error }, "API request cancelled");
          } else if (
            error.name === "ApiError" &&
            [401, 403, 405].indexOf(error.status) !== -1 &&
            new URL(this.config.server.endpoint).hostname.endsWith("app.tabbyml.com") &&
            this.config.server.requestHeaders["Authorization"] === undefined
          ) {
-            this.logger.debug({ api: api.name, error }, "API unauthorized");
+            this.logger.debug({ requestId, api: api.name, error }, "API unauthorized");
            this.changeStatus("unauthorized");
          } else if (error.name === "ApiError") {
-            this.logger.error({ api: api.name, error }, "API error");
+            this.logger.error({ requestId, api: api.name, error }, "API error");
            this.changeStatus("disconnected");
          } else {
-            this.logger.error({ api: api.name, error }, "API request failed with unknown error");
+            this.logger.error({ requestId, api: api.name, error }, "API request failed with unknown error");
            this.changeStatus("disconnected");
          }
-          throw error;
-        }),
-      () => {
-        promise.cancel();
-      },
-    );
+          // don't record cancelled request in stats
+          if (api.name === "completion" && (error.isTimeoutError || !error.isCancelled)) {
+            this.completionResponseStats.push({
+              name: api.name,
+              status: error.status,
+              responseTime: performance.now() - requestStartedAt,
+              error,
+            });
+          }
+          if (timeout) {
+            clearTimeout(timeout);
+          }
+          reject(error);
+        });
+      // It seems that openapi-typescript-codegen does not provide timeout options passing to axios,
+      // Just use setTimeout to cancel the request manually.
+      if (options.timeout && options.timeout > 0) {
+        timeout = setTimeout(
+          () => {
+            this.logger.debug({ api: api.name, timeout: options.timeout }, "Cancel API request due to timeout");
+            timeoutCancelled = true;
+            apiRequest.cancel();
+          },
+          Math.min(options.timeout, 0x7fffffff),
+        );
+      }
+      onCancel(() => {
+        if (timeout) {
+          clearTimeout(timeout);
+        }
+        apiRequest.cancel();
+      });
+    });
  }

  private healthCheck(): Promise<any> {
-    return this.callApi(this.api.v1.health, {}).catch(() => {});
+    return this.callApi(this.api.v1.health, {})
+      .then((healthState) => {
+        this.serverHealthState = healthState;
+      })
+      .catch(() => {});
  }

  private createSegments(request: CompletionRequest): { prefix: string; suffix: string } {
@ -198,6 +308,10 @@ export class TabbyAgent extends EventEmitter implements Agent {
      }
      const prevStatus = this.status;
      await this.applyConfig();
+      // If server config changed, clear server health state
+      if (key.startsWith("server")) {
+        this.serverHealthState = null;
+      }
      // If status unchanged, `authRequired` will not be emitted when `applyConfig`,
      // so we need to emit it manually.
      if (key.startsWith("server") && prevStatus === "unauthorized" && this.status === "unauthorized") {
@ -222,6 +336,14 @@ export class TabbyAgent extends EventEmitter implements Agent {
    return this.status;
  }

+  public getIssues(): AgentIssue[] {
+    return this.issues.map((issue) => this.issueWithDetails(issue));
+  }
+
+  public getServerHealthState(): ServerHealthState | null {
+    return this.serverHealthState;
+  }
+
  public requestAuthUrl(): CancelablePromise<{ authUrl: string; code: string } | null> {
    if (this.status === "notInitialized") {
      return cancelable(Promise.reject("Agent is not initialized"), () => {});
@ -283,11 +405,17 @@ export class TabbyAgent extends EventEmitter implements Agent {
              choices: [],
            };
          }
-          const apiRequest = this.callApi(this.api.v1.completion, {
-            language: request.language,
-            segments,
-            user: this.auth?.user,
-          });
+          const apiRequest = this.callApi(
+            this.api.v1.completion,
+            {
+              language: request.language,
+              segments,
+              user: this.auth?.user,
+            },
+            {
+              timeout: request.manually ? this.config.completion.timeout.manually : this.config.completion.timeout.auto,
+            },
+          );
          cancelableList.push(apiRequest);
          return apiRequest
            .then((response) => {
--- a/clients/vscode/src/TabbyCompletionProvider.ts
+++ b/clients/vscode/src/TabbyCompletionProvider.ts
@ -4,6 +4,7 @@ import {
  InlineCompletionItem,
  InlineCompletionItemProvider,
  InlineCompletionList,
+  InlineCompletionTriggerKind,
  Position,
  ProviderResult,
  Range,
@ -65,6 +66,7 @@ export class TabbyCompletionProvider implements InlineCompletionItemProvider {
      language: document.languageId,  // https://code.visualstudio.com/docs/languages/identifiers
      text: document.getText(),
      position: document.offsetAt(position),
+      manually: context.triggerKind === InlineCompletionTriggerKind.Invoke,
      maxPrefixLines: this.maxPrefixLines,
      maxSuffixLines: this.maxSuffixLines,
    };
--- a/clients/vscode/src/commands.ts
+++ b/clients/vscode/src/commands.ts
@ -174,6 +174,16 @@ const statusBarItemClicked: Command = {
      case "unauthorized":
        notifications.showInformationStartAuth();
        break;
+      case "issuesExist":
+        switch (agent().getIssues()[0]?.name) {
+          case "slowCompletionResponseTime":
+            notifications.showInformationWhenSlowCompletionResponseTime();
+            break;
+          case "highCompletionTimeoutRate":
+            notifications.showInformationWhenHighCompletionTimeoutRate();
+            break;
+        }
+        break;
      case "disabled":
        const enabled = workspace.getConfiguration("tabby").get("codeCompletion", true);
        const inlineSuggestEnabled = workspace.getConfiguration("editor").get("inlineSuggest.enabled", true);
--- a/clients/vscode/src/notifications.ts
+++ b/clients/vscode/src/notifications.ts
@ -1,4 +1,5 @@
-import { commands, window, workspace, ConfigurationTarget } from "vscode";
+import { commands, window, workspace, env, ConfigurationTarget, Uri } from "vscode";
+import { agent } from "./agent";

 function showInformationWhenLoading() {
  window.showInformationMessage("Tabby is initializing.", "Settings").then((selection) => {
@ -109,6 +110,113 @@ function showInformationWhenInlineSuggestDisabled() {
    });
 }

+function getHelpMessageForCompletionResponseTimeIssue() {
+  let helpMessageForRunningLargeModelOnCPU = "";
+  const serverHealthState = agent().getServerHealthState();
+  if (serverHealthState?.device === "cpu" && serverHealthState?.model?.match(/[0-9\.]+B$/)) {
+    helpMessageForRunningLargeModelOnCPU +=
+      `Your Tabby server is running model ${serverHealthState?.model} on CPU. ` +
+      "This model is too large to run on CPU, please try a smaller model or switch to GPU. " +
+      "You can find supported model list by search TabbyML on HuggingFace. \n";
+  }
+  let message = "";
+  if (helpMessageForRunningLargeModelOnCPU.length > 0) {
+    message += helpMessageForRunningLargeModelOnCPU + "\n";
+    message += "Other possible causes of this issue are: \n";
+  } else {
+    message += "Possible causes of this issue are: \n";
+  }
+  message += " - A poor network connection. Please check your network and proxy settings.\n";
+  message += " - Server overload. Please contact your Tabby server administrator for assistance.\n";
+  if (helpMessageForRunningLargeModelOnCPU.length == 0) {
+    message += ` - The running model ${serverHealthState?.model ?? ""} is too large to run on your Tabby server. `;
+    message += "Please try a smaller model. You can find supported model list by search TabbyML on HuggingFace.\n";
+  }
+  return message;
+}
+
+function showInformationWhenSlowCompletionResponseTime(modal: boolean = false) {
+  if (modal) {
+    const stats = agent()
+      .getIssues()
+      .find((issue) => issue.name === "slowCompletionResponseTime")?.completionResponseStats;
+    let statsMessage = "";
+    if (stats && stats["responses"] && stats["averageResponseTime"]) {
+      statsMessage = `The average response time of recent ${stats["responses"]} completion requests is ${Number(
+        stats["averageResponseTime"],
+      ).toFixed(0)}ms.\n\n`;
+    }
+    window
+      .showWarningMessage(
+        "Completion requests appear to take too much time.",
+        {
+          modal: true,
+          detail: statsMessage + getHelpMessageForCompletionResponseTimeIssue(),
+        },
+        "Supported Models",
+      )
+      .then((selection) => {
+        switch (selection) {
+          case "Supported Models":
+            env.openExternal(Uri.parse("https://huggingface.co/models?search=tabbyml"));
+            break;
+        }
+      });
+  } else {
+    window
+      .showWarningMessage("Completion requests appear to take too much time.", "Detail", "Settings")
+      .then((selection) => {
+        switch (selection) {
+          case "Detail":
+            showInformationWhenSlowCompletionResponseTime(true);
+            break;
+          case "Settings":
+            commands.executeCommand("tabby.openSettings");
+            break;
+        }
+      });
+  }
+}
+
+function showInformationWhenHighCompletionTimeoutRate(modal: boolean = false) {
+  if (modal) {
+    const stats = agent()
+      .getIssues()
+      .find((issue) => issue.name === "highCompletionTimeoutRate")?.completionResponseStats;
+    let statsMessage = "";
+    if (stats && stats["total"] && stats["timeouts"]) {
+      statsMessage = `${stats["timeouts"]} of ${stats["total"]} completion requests timed out.\n\n`;
+    }
+    window
+      .showWarningMessage(
+        "Most completion requests timed out.",
+        {
+          modal: true,
+          detail: statsMessage + getHelpMessageForCompletionResponseTimeIssue(),
+        },
+        "Supported Models",
+      )
+      .then((selection) => {
+        switch (selection) {
+          case "Supported Models":
+            env.openExternal(Uri.parse("https://huggingface.co/models?search=tabbyml"));
+            break;
+        }
+      });
+  } else {
+    window.showWarningMessage("Most completion requests timed out.", "Detail", "Settings").then((selection) => {
+      switch (selection) {
+        case "Detail":
+          showInformationWhenHighCompletionTimeoutRate(true);
+          break;
+        case "Settings":
+          commands.executeCommand("tabby.openSettings");
+          break;
+      }
+    });
+  }
+}
+
 export const notifications = {
  showInformationWhenLoading,
  showInformationWhenDisabled,
@ -119,4 +227,6 @@ export const notifications = {
  showInformationWhenStartAuthButAlreadyAuthorized,
  showInformationWhenAuthFailed,
  showInformationWhenInlineSuggestDisabled,
+  showInformationWhenSlowCompletionResponseTime,
+  showInformationWhenHighCompletionTimeoutRate,
 };
--- a/clients/vscode/src/statusBarItem.ts
+++ b/clients/vscode/src/statusBarItem.ts
@ -8,6 +8,7 @@ const iconLoading = "$(loading~spin)";
 const iconReady = "$(check)";
 const iconDisconnected = "$(plug)";
 const iconUnauthorized = "$(key)";
+const iconIssueExist = "$(warning)";
 const iconDisabled = "$(x)";
 const colorNormal = new ThemeColor("statusBar.foreground");
 const colorWarning = new ThemeColor("statusBarItem.warningForeground");
@ -20,15 +21,33 @@ const fsm = createMachine({
  initial: "loading",
  states: {
    loading: {
-      on: { ready: "ready", disconnected: "disconnected", unauthorized: "unauthorized", disabled: "disabled" },
+      on: {
+        ready: "ready",
+        disconnected: "disconnected",
+        unauthorized: "unauthorized",
+        issuesExist: "issuesExist",
+        disabled: "disabled",
+      },
      entry: () => toLoading(),
    },
    ready: {
-      on: { disconnected: "disconnected", unauthorized: "unauthorized", disabled: "disabled" },
+      on: {
+        loading: "loading",
+        disconnected: "disconnected",
+        unauthorized: "unauthorized",
+        issuesExist: "issuesExist",
+        disabled: "disabled",
+      },
      entry: () => toReady(),
    },
    disconnected: {
-      on: { ready: "ready", unauthorized: "unauthorized", disabled: "disabled" },
+      on: {
+        loading: "loading",
+        ready: "ready",
+        unauthorized: "unauthorized",
+        issuesExist: "issuesExist",
+        disabled: "disabled",
+      },
      entry: () => toDisconnected(),
    },
    unauthorized: {
@ -36,17 +55,39 @@ const fsm = createMachine({
        ready: "ready",
        disconnected: "disconnected",
        disabled: "disabled",
+        issuesExist: "issuesExist",
        authStart: "unauthorizedAndAuthInProgress",
      },
      entry: () => toUnauthorized(),
    },
    unauthorizedAndAuthInProgress: {
-      // if auth succeeds, we will get `ready` before `authEnd` event
-      on: { ready: "ready", disconnected: "disconnected", disabled: "disabled", authEnd: "unauthorized" },
+      on: {
+        ready: "ready",
+        disconnected: "disconnected",
+        issuesExist: "issuesExist",
+        disabled: "disabled",
+        authEnd: "unauthorized", // if auth succeeds, we will get `ready` before `authEnd` event
+      },
      entry: () => toUnauthorizedAndAuthInProgress(),
    },
+    issuesExist: {
+      on: {
+        loading: "loading",
+        ready: "ready",
+        disconnected: "disconnected",
+        unauthorized: "unauthorized",
+        disabled: "disabled",
+      },
+      entry: () => toIssuesExist(),
+    },
    disabled: {
-      on: { loading: "loading", ready: "ready", disconnected: "disconnected", unauthorized: "unauthorized" },
+      on: {
+        loading: "loading",
+        ready: "ready",
+        disconnected: "disconnected",
+        unauthorized: "unauthorized",
+        issuesExist: "issuesExist",
+      },
      entry: () => toDisabled(),
    },
  },
@ -93,6 +134,24 @@ function toUnauthorizedAndAuthInProgress() {
  item.command = undefined;
 }

+function toIssuesExist() {
+  item.color = colorWarning;
+  item.backgroundColor = backgroundColorWarning;
+  item.text = `${iconIssueExist} ${label}`;
+  switch (agent().getIssues()[0]?.name) {
+    case "slowCompletionResponseTime":
+      item.tooltip = "Completion requests appear to take too much time.";
+      break;
+    case "highCompletionTimeoutRate":
+      item.tooltip = "Most completion requests timed out.";
+      break;
+    default:
+      item.tooltip = "";
+      break;
+  }
+  item.command = { title: "", command: "tabby.statusBarItemClicked", arguments: ["issuesExist"] };
+}
+
 function toDisabled() {
  item.color = colorWarning;
  item.backgroundColor = backgroundColorWarning;
@ -115,6 +174,7 @@ function updateStatusBarItem() {
      case "ready":
      case "disconnected":
      case "unauthorized":
+      case "issuesExist":
        fsmService.send(status);
        break;
    }
@ -143,6 +203,14 @@ export const tabbyStatusBarItem = () => {
    });
  });

+  agent().on("newIssue", (event) => {
+    if (event.issue.name === "slowCompletionResponseTime") {
+      notifications.showInformationWhenSlowCompletionResponseTime();
+    } else if (event.issue.name === "highCompletionTimeoutRate") {
+      notifications.showInformationWhenHighCompletionTimeoutRate();
+    }
+  });
+
  item.show();
  return item;
 };