From 732d83feefbec8432021071f84183b433c727f93 Mon Sep 17 00:00:00 2001
From: Zhiming Ma <codes.icy@gmail.com>
Date: Thu, 17 Aug 2023 22:28:41 +0800
Subject: [PATCH] feat(agent): monitor completion response time. (#359)

* feat(agent): monitor completion response time.

* fix(agent): improve help message for running large model on cpu.

* fix: notification spacing.
---
 clients/tabby-agent/openapi/tabby.json        | 121 +++---------
 clients/tabby-agent/src/Agent.ts              |  49 ++++-
 clients/tabby-agent/src/AgentConfig.ts        |  10 +
 clients/tabby-agent/src/ResponseStats.ts      | 109 +++++++++++
 clients/tabby-agent/src/TabbyAgent.ts         | 176 +++++++++++++++---
 clients/vscode/src/TabbyCompletionProvider.ts |   2 +
 clients/vscode/src/commands.ts                |  10 +
 clients/vscode/src/notifications.ts           | 112 ++++++++++-
 clients/vscode/src/statusBarItem.ts           |  80 +++++++-
 9 files changed, 533 insertions(+), 136 deletions(-)
 create mode 100644 clients/tabby-agent/src/ResponseStats.ts

diff --git a/clients/tabby-agent/openapi/tabby.json b/clients/tabby-agent/openapi/tabby.json
index a1a83d4..86fbb29 100644
--- a/clients/tabby-agent/openapi/tabby.json
+++ b/clients/tabby-agent/openapi/tabby.json
@@ -37,17 +37,9 @@
         "responses": {
           "200": {
             "description": "Success",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/CompletionResponse"
-                }
-              }
-            }
+            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/CompletionResponse" } } }
           },
-          "400": {
-            "description": "Bad Request"
-          }
+          "400": { "description": "Bad Request" }
         }
       }
     },
@@ -56,23 +48,10 @@
         "tags": ["v1"],
         "operationId": "event",
         "requestBody": {
-          "content": {
-            "application/json": {
-              "schema": {
-                "$ref": "#/components/schemas/LogEventRequest"
-              }
-            }
-          },
+          "content": { "application/json": { "schema": { "$ref": "#/components/schemas/LogEventRequest" } } },
           "required": true
         },
-        "responses": {
-          "200": {
-            "description": "Success"
-          },
-          "400": {
-            "description": "Bad Request"
-          }
-        }
+        "responses": { "200": { "description": "Success" }, "400": { "description": "Bad Request" } }
       }
     },
     "/v1/health": {
@@ -82,13 +61,7 @@
         "responses": {
           "200": {
             "description": "Success",
-            "content": {
-              "application/json": {
-                "schema": {
-                  "$ref": "#/components/schemas/HealthState"
-                }
-              }
-            }
+            "content": { "application/json": { "schema": { "$ref": "#/components/schemas/HealthState" } } }
           }
         }
       }
@@ -100,108 +73,62 @@
         "type": "object",
         "required": ["index", "text"],
         "properties": {
-          "index": {
-            "type": "integer",
-            "format": "int32",
-            "minimum": 0
-          },
-          "text": {
-            "type": "string"
-          }
+          "index": { "type": "integer", "format": "int32", "minimum": 0.0 },
+          "text": { "type": "string" }
         }
       },
       "CompletionRequest": {
         "type": "object",
         "properties": {
-          "prompt": {
-            "type": "string",
-            "example": "def fib(n):",
-            "nullable": true
-          },
+          "prompt": { "type": "string", "example": "def fib(n):", "nullable": true },
           "language": {
             "type": "string",
             "description": "Language identifier, full list is maintained at\nhttps://code.visualstudio.com/docs/languages/identifiers",
             "example": "python",
             "nullable": true
           },
-          "segments": {
-            "allOf": [
-              {
-                "$ref": "#/components/schemas/Segments"
-              }
-            ],
-            "nullable": true
-          },
-          "user": {
-            "type": "string",
-            "nullable": true
-          }
+          "segments": { "allOf": [{ "$ref": "#/components/schemas/Segments" }], "nullable": true },
+          "user": { "type": "string", "nullable": true }
         },
         "example": {
           "language": "python",
-          "segments": {
-            "prefix": "def fib(n):\n    ",
-            "suffix": "\n        return fib(n - 1) + fib(n - 2)"
-          }
+          "segments": { "prefix": "def fib(n):\n    ", "suffix": "\n        return fib(n - 1) + fib(n - 2)" }
         }
       },
       "CompletionResponse": {
         "type": "object",
         "required": ["id", "choices"],
         "properties": {
-          "id": {
-            "type": "string"
-          },
-          "choices": {
-            "type": "array",
-            "items": {
-              "$ref": "#/components/schemas/Choice"
-            }
-          }
+          "id": { "type": "string" },
+          "choices": { "type": "array", "items": { "$ref": "#/components/schemas/Choice" } }
         }
       },
       "HealthState": {
         "type": "object",
-        "required": ["model", "device", "compute_type"],
+        "required": ["model", "device", "compute_type", "arch", "cpu_info", "cpu_count"],
         "properties": {
-          "model": {
-            "type": "string"
-          },
-          "device": {
-            "type": "string"
-          },
-          "compute_type": {
-            "type": "string"
-          }
+          "model": { "type": "string" },
+          "device": { "type": "string" },
+          "compute_type": { "type": "string" },
+          "arch": { "type": "string" },
+          "cpu_info": { "type": "string" },
+          "cpu_count": { "type": "integer", "minimum": 0.0 }
         }
       },
       "LogEventRequest": {
         "type": "object",
         "required": ["type", "completion_id", "choice_index"],
         "properties": {
-          "type": {
-            "type": "string",
-            "description": "Event type, should be `view` or `select`.",
-            "example": "view"
-          },
-          "completion_id": {
-            "type": "string"
-          },
-          "choice_index": {
-            "type": "integer",
-            "format": "int32",
-            "minimum": 0
-          }
+          "type": { "type": "string", "description": "Event type, should be `view` or `select`.", "example": "view" },
+          "completion_id": { "type": "string" },
+          "choice_index": { "type": "integer", "format": "int32", "minimum": 0.0 }
         }
       },
       "Segments": {
         "type": "object",
         "required": ["prefix"],
         "properties": {
-          "prefix": {
-            "type": "string",
-            "description": "Content that appears before the cursor in the editor window."
-          },
+          "prefix": { "type": "string", "description": "Content that appears before the cursor in the editor window." },
           "suffix": {
             "type": "string",
             "description": "Content that appears after the cursor in the editor window.",
diff --git a/clients/tabby-agent/src/Agent.ts b/clients/tabby-agent/src/Agent.ts
index 3a13ab7..ed3deb8 100644
--- a/clients/tabby-agent/src/Agent.ts
+++ b/clients/tabby-agent/src/Agent.ts
@@ -2,6 +2,7 @@ import {
   CancelablePromise,
   LogEventRequest as ApiLogEventRequest,
   CompletionResponse as ApiCompletionResponse,
+  HealthState,
 } from "./generated";
 
 import { AgentConfig, PartialAgentConfig } from "./AgentConfig";
@@ -11,11 +12,14 @@ export type AgentInitOptions = Partial<{
   client: string;
 }>;
 
+export type ServerHealthState = HealthState;
+
 export type CompletionRequest = {
   filepath: string;
   language: string;
   text: string;
   position: number;
+  manually?: boolean;
   maxPrefixLines?: number;
   maxSuffixLines?: number;
 };
@@ -24,16 +28,31 @@ export type CompletionResponse = ApiCompletionResponse;
 
 export type LogEventRequest = ApiLogEventRequest;
 
+export type SlowCompletionResponseTimeIssue = {
+  name: "slowCompletionResponseTime";
+  completionResponseStats: Record<string, number>;
+};
+export type HighCompletionTimeoutRateIssue = {
+  name: "highCompletionTimeoutRate";
+  completionResponseStats: Record<string, number>;
+};
+export type AgentIssue = SlowCompletionResponseTimeIssue | HighCompletionTimeoutRateIssue;
+
 /**
- * `notInitialized`: When the agent is not initialized.
- * `ready`: When the agent get a valid response from the server, and is ready to use.
- * `disconnected`: When the agent failed to connect to the server.
- * `unauthorized`: When the server is set to a Tabby Cloud endpoint that requires auth,
+ * Represents the status of the agent.
+ * @enum
+ * @property {string} notInitialized - When the agent is not initialized.
+ * @property {string} ready - When the agent gets a valid response from the server.
+ * @property {string} disconnected - When the agent fails to connect to the server.
+ * @property {string} unauthorized - When the server is set to a Tabby Cloud endpoint that requires auth,
  *   and no `Authorization` request header is provided in the agent config,
- *   and user has not completed the auth flow or the auth token is expired.
+ *   and the user has not completed the auth flow or the auth token is expired.
  *   See also `requestAuthUrl` and `waitForAuthToken`.
+ * @property {string} issuesExist - When the agent gets a valid response from the server, but still
+ *   has some non-blocking issues, e.g. the average completion response time is too slow,
+ *   or the timeout rate is too high.
  */
-export type AgentStatus = "notInitialized" | "ready" | "disconnected" | "unauthorized";
+export type AgentStatus = "notInitialized" | "ready" | "disconnected" | "unauthorized" | "issuesExist";
 
 export interface AgentFunction {
   /**
@@ -70,6 +89,16 @@ export interface AgentFunction {
    */
   getStatus(): AgentStatus;
 
+  /**
+   * @returns the current issues if AgentStatus is `issuesExist`, otherwise returns empty array
+   */
+  getIssues(): AgentIssue[];
+
+  /**
+   * @returns server info returned from latest server health check, returns null if not available
+   */
+  getServerHealthState(): ServerHealthState | null;
+
   /**
    * Request auth url for Tabby Cloud endpoint. Only return value when the `AgentStatus` is `unauthorized`.
    * Otherwise, return null. See also `AgentStatus`.
@@ -117,9 +146,13 @@ export type AuthRequiredEvent = {
   event: "authRequired";
   server: AgentConfig["server"];
 };
+export type NewIssueEvent = {
+  event: "newIssue";
+  issue: AgentIssue;
+};
 
-export type AgentEvent = StatusChangedEvent | ConfigUpdatedEvent | AuthRequiredEvent;
-export const agentEventNames: AgentEvent["event"][] = ["statusChanged", "configUpdated", "authRequired"];
+export type AgentEvent = StatusChangedEvent | ConfigUpdatedEvent | AuthRequiredEvent | NewIssueEvent;
+export const agentEventNames: AgentEvent["event"][] = ["statusChanged", "configUpdated", "authRequired", "newIssue"];
 
 export interface AgentEventEmitter {
   on<T extends AgentEvent>(eventName: T["event"], callback: (event: T) => void): this;
diff --git a/clients/tabby-agent/src/AgentConfig.ts b/clients/tabby-agent/src/AgentConfig.ts
index 70d8d85..67404ca 100644
--- a/clients/tabby-agent/src/AgentConfig.ts
+++ b/clients/tabby-agent/src/AgentConfig.ts
@@ -4,8 +4,13 @@ export type AgentConfig = {
   server: {
     endpoint: string;
     requestHeaders: Record<string, string>;
+    requestTimeout: number;
   };
   completion: {
+    timeout: {
+      auto: number;
+      manually: number;
+    };
     maxPrefixLines: number;
     maxSuffixLines: number;
   };
@@ -31,8 +36,13 @@ export const defaultAgentConfig: AgentConfig = {
   server: {
     endpoint: "http://localhost:8080",
     requestHeaders: {},
+    requestTimeout: 30000, // 30s
   },
   completion: {
+    timeout: {
+      auto: 5000, // 5s
+      manually: 30000, // 30s
+    },
     maxPrefixLines: 20,
     maxSuffixLines: 20,
   },
diff --git a/clients/tabby-agent/src/ResponseStats.ts b/clients/tabby-agent/src/ResponseStats.ts
new file mode 100644
index 0000000..f4ebb73
--- /dev/null
+++ b/clients/tabby-agent/src/ResponseStats.ts
@@ -0,0 +1,109 @@
+import { EventEmitter } from "events";
+import { rootLogger } from "./logger";
+
+export type ResponseStatsEntry = {
+  name: string;
+  status: number;
+  responseTime: number;
+  error?: any;
+};
+
+export type ResponseStatsStrategy = {
+  maxSize: number;
+  stats: Record<string, (entries: ResponseStatsEntry[]) => number>;
+  checks: {
+    name: string;
+    check: (entries: ResponseStatsEntry[], stats: Record<string, number>) => boolean;
+  }[];
+};
+
+export const completionResponseTimeStatsStrategy = {
+  maxSize: 50,
+  stats: {
+    total: (entries: ResponseStatsEntry[]) => entries.length,
+    responses: (entries: ResponseStatsEntry[]) => entries.filter((entry) => entry.status === 200).length,
+    timeouts: (entries: ResponseStatsEntry[]) => entries.filter((entry) => entry.error?.isTimeoutError).length,
+    averageResponseTime: (entries: ResponseStatsEntry[]) =>
+      entries.filter((entry) => entry.status === 200).reduce((acc, entry) => acc + entry.responseTime, 0) /
+      entries.length,
+  },
+  checks: [
+    // check in order and emit the first event that matches
+    // if all the last 5 entries have status 200 and response time less than 3s
+    {
+      name: "healthy",
+      check: (entries: ResponseStatsEntry[], stats) => {
+        const recentEntries = entries.slice(-5);
+        return recentEntries.every((entry) => entry.status === 200 && entry.responseTime < 3000);
+      },
+    },
+    // if TimeoutError percentage is more than 50%, at least 3 requests
+    {
+      name: "highTimeoutRate",
+      check: (entries: ResponseStatsEntry[], stats) => {
+        if (stats.total < 3) {
+          return false;
+        }
+        return stats.timeouts / stats.total > 0.5;
+      },
+    },
+    // if average response time is more than 4s, at least 5 requests
+    {
+      name: "slowResponseTime",
+      check: (entries: ResponseStatsEntry[], stats) => {
+        if (stats.responses < 5) {
+          return false;
+        }
+        return stats.averageResponseTime > 4000;
+      },
+    },
+  ],
+};
+
+export class ResponseStats extends EventEmitter {
+  private readonly logger = rootLogger.child({ component: "ResponseStats" });
+  private strategy: ResponseStatsStrategy = {
+    maxSize: 0,
+    stats: {},
+    checks: [],
+  };
+
+  private entries: ResponseStatsEntry[] = [];
+
+  constructor(strategy: ResponseStatsStrategy) {
+    super();
+    this.strategy = strategy;
+  }
+
+  push(entry: ResponseStatsEntry): void {
+    this.entries.push(entry);
+    if (this.entries.length > this.strategy.maxSize) {
+      this.entries.shift();
+    }
+    const stats = this.stats();
+    for (const check of this.strategy.checks) {
+      if (check.check(this.entries, stats)) {
+        this.logger.debug({ check: check.name, stats }, "Check condition met");
+        this.emit(check.name, stats);
+      }
+    }
+  }
+
+  stats(): Record<string, number> {
+    const result: Record<string, number> = {};
+    for (const [name, stats] of Object.entries(this.strategy.stats)) {
+      result[name] = stats(this.entries);
+    }
+    return result;
+  }
+
+  check(): string | null {
+    const stats = this.stats();
+    for (const check of this.strategy.checks) {
+      if (check.check(this.entries, stats)) {
+        return check.name;
+      }
+    }
+    return null;
+  }
+}
diff --git a/clients/tabby-agent/src/TabbyAgent.ts b/clients/tabby-agent/src/TabbyAgent.ts
index c354acb..d43982e 100644
--- a/clients/tabby-agent/src/TabbyAgent.ts
+++ b/clients/tabby-agent/src/TabbyAgent.ts
@@ -8,8 +8,10 @@ import { cancelable, splitLines, isBlank } from "./utils";
 import {
   Agent,
   AgentStatus,
+  AgentIssue,
   AgentEvent,
   AgentInitOptions,
+  ServerHealthState,
   CompletionRequest,
   CompletionResponse,
   LogEventRequest,
@@ -21,6 +23,7 @@ import { DataStore } from "./dataStore";
 import { postprocess, preCacheProcess } from "./postprocess";
 import { rootLogger, allLoggers } from "./logger";
 import { AnonymousUsageLogger } from "./AnonymousUsageLogger";
+import { ResponseStats, completionResponseTimeStatsStrategy } from "./ResponseStats";
 
 /**
  * Different from AgentInitOptions or AgentConfig, this may contain non-serializable objects,
@@ -37,12 +40,15 @@ export class TabbyAgent extends EventEmitter implements Agent {
   private userConfig: PartialAgentConfig = {}; // config from `~/.tabby/agent/config.toml`
   private clientConfig: PartialAgentConfig = {}; // config from `initialize` and `updateConfig` method
   private status: AgentStatus = "notInitialized";
+  private issues: AgentIssue["name"][] = [];
+  private serverHealthState: ServerHealthState | null = null;
   private api: TabbyApi;
   private auth: Auth;
   private dataStore: DataStore | null = null;
   private completionCache: CompletionCache = new CompletionCache();
   static readonly tryConnectInterval = 1000 * 30; // 30s
   private tryingConnectTimer: ReturnType<typeof setInterval> | null = null;
+  private completionResponseStats: ResponseStats = new ResponseStats(completionResponseTimeStatsStrategy);
 
   private constructor() {
     super();
@@ -53,6 +59,23 @@ export class TabbyAgent extends EventEmitter implements Agent {
         await this.healthCheck();
       }
     }, TabbyAgent.tryConnectInterval);
+
+    this.completionResponseStats.on("healthy", () => {
+      this.popIssue("slowCompletionResponseTime");
+      this.popIssue("highCompletionTimeoutRate");
+    });
+    this.completionResponseStats.on("highTimeoutRate", () => {
+      if (this.status === "ready" || this.status === "issuesExist") {
+        this.popIssue("slowCompletionResponseTime");
+        this.pushIssue("highCompletionTimeoutRate");
+      }
+    });
+    this.completionResponseStats.on("slowResponseTime", () => {
+      if (this.status === "ready" || this.status === "issuesExist") {
+        this.popIssue("highCompletionTimeoutRate");
+        this.pushIssue("slowCompletionResponseTime");
+      }
+    });
   }
 
   static async create(options?: TabbyAgentOptions): Promise<TabbyAgent> {
@@ -101,6 +124,38 @@ export class TabbyAgent extends EventEmitter implements Agent {
     }
   }
 
+  private issueWithDetails(issue: AgentIssue["name"]): AgentIssue {
+    switch (issue) {
+      case "highCompletionTimeoutRate":
+        return {
+          name: "highCompletionTimeoutRate",
+          completionResponseStats: this.completionResponseStats.stats(),
+        };
+      case "slowCompletionResponseTime":
+        return {
+          name: "slowCompletionResponseTime",
+          completionResponseStats: this.completionResponseStats.stats(),
+        };
+    }
+  }
+
+  private pushIssue(issue: AgentIssue["name"]) {
+    if (this.issues.indexOf(issue) === -1) {
+      this.issues.push(issue);
+      this.changeStatus("issuesExist");
+      const event: AgentEvent = { event: "newIssue", issue: this.issueWithDetails(issue) };
+      this.logger.debug({ event }, "New issue");
+      super.emit("newIssue", event);
+    }
+  }
+
+  private popIssue(issue: AgentIssue["name"]) {
+    this.issues = this.issues.filter((i) => i !== issue);
+    if (this.issues.length === 0 && this.status === "issuesExist") {
+      this.changeStatus("ready");
+    }
+  }
+
   private emitAuthRequired() {
     const event: AgentEvent = { event: "authRequired", server: this.config.server };
     super.emit("authRequired", event);
@@ -109,44 +164,99 @@ export class TabbyAgent extends EventEmitter implements Agent {
   private callApi<Request, Response>(
     api: (request: Request) => CancelablePromise<Response>,
     request: Request,
+    options: { timeout?: number } = { timeout: this.config.server.requestTimeout },
   ): CancelablePromise<Response> {
-    this.logger.debug({ api: api.name, request }, "API request");
-    const promise = api.call(this.api.v1, request);
-    return cancelable(
-      promise
+    return new CancelablePromise((resolve, reject, onCancel) => {
+      const requestId = uuid();
+      this.logger.debug({ requestId, api: api.name, request }, "API request");
+      let timeout: ReturnType<typeof setTimeout> | null = null;
+      let timeoutCancelled = false;
+      const apiRequest = api.call(this.api.v1, request);
+      const requestStartedAt = performance.now();
+      apiRequest
         .then((response: Response) => {
-          this.logger.debug({ api: api.name, response }, "API response");
-          this.changeStatus("ready");
-          return response;
+          this.logger.debug({ requestId, api: api.name, response }, "API response");
+          if (this.status !== "issuesExist") {
+            this.changeStatus("ready");
+          }
+          if (api.name === "completion") {
+            this.completionResponseStats.push({
+              name: api.name,
+              status: 200,
+              responseTime: performance.now() - requestStartedAt,
+            });
+          }
+          if (timeout) {
+            clearTimeout(timeout);
+          }
+          resolve(response);
         })
         .catch((error) => {
-          if (!!error.isCancelled) {
-            this.logger.debug({ api: api.name, error }, "API request canceled");
+          if (
+            (!!error.isCancelled && timeoutCancelled) ||
+            (!error.isCancelled && error.code === "ECONNABORTED") ||
+            (error.name === "ApiError" && [408, 499].indexOf(error.status) !== -1)
+          ) {
+            error.isTimeoutError = true;
+            this.logger.debug({ requestId, api: api.name, error }, "API request timeout");
+          } else if (!!error.isCancelled) {
+            this.logger.debug({ requestId, api: api.name, error }, "API request cancelled");
           } else if (
             error.name === "ApiError" &&
             [401, 403, 405].indexOf(error.status) !== -1 &&
             new URL(this.config.server.endpoint).hostname.endsWith("app.tabbyml.com") &&
             this.config.server.requestHeaders["Authorization"] === undefined
           ) {
-            this.logger.debug({ api: api.name, error }, "API unauthorized");
+            this.logger.debug({ requestId, api: api.name, error }, "API unauthorized");
             this.changeStatus("unauthorized");
           } else if (error.name === "ApiError") {
-            this.logger.error({ api: api.name, error }, "API error");
+            this.logger.error({ requestId, api: api.name, error }, "API error");
             this.changeStatus("disconnected");
           } else {
-            this.logger.error({ api: api.name, error }, "API request failed with unknown error");
+            this.logger.error({ requestId, api: api.name, error }, "API request failed with unknown error");
             this.changeStatus("disconnected");
           }
-          throw error;
-        }),
-      () => {
-        promise.cancel();
-      },
-    );
+          // don't record cancelled request in stats
+          if (api.name === "completion" && (error.isTimeoutError || !error.isCancelled)) {
+            this.completionResponseStats.push({
+              name: api.name,
+              status: error.status,
+              responseTime: performance.now() - requestStartedAt,
+              error,
+            });
+          }
+          if (timeout) {
+            clearTimeout(timeout);
+          }
+          reject(error);
+        });
+      // It seems that openapi-typescript-codegen does not provide timeout options passing to axios,
+      // Just use setTimeout to cancel the request manually.
+      if (options.timeout && options.timeout > 0) {
+        timeout = setTimeout(
+          () => {
+            this.logger.debug({ api: api.name, timeout: options.timeout }, "Cancel API request due to timeout");
+            timeoutCancelled = true;
+            apiRequest.cancel();
+          },
+          Math.min(options.timeout, 0x7fffffff),
+        );
+      }
+      onCancel(() => {
+        if (timeout) {
+          clearTimeout(timeout);
+        }
+        apiRequest.cancel();
+      });
+    });
   }
 
   private healthCheck(): Promise<any> {
-    return this.callApi(this.api.v1.health, {}).catch(() => {});
+    return this.callApi(this.api.v1.health, {})
+      .then((healthState) => {
+        this.serverHealthState = healthState;
+      })
+      .catch(() => {});
   }
 
   private createSegments(request: CompletionRequest): { prefix: string; suffix: string } {
@@ -198,6 +308,10 @@ export class TabbyAgent extends EventEmitter implements Agent {
       }
       const prevStatus = this.status;
       await this.applyConfig();
+      // If server config changed, clear server health state
+      if (key.startsWith("server")) {
+        this.serverHealthState = null;
+      }
       // If status unchanged, `authRequired` will not be emitted when `applyConfig`,
       // so we need to emit it manually.
       if (key.startsWith("server") && prevStatus === "unauthorized" && this.status === "unauthorized") {
@@ -222,6 +336,14 @@ export class TabbyAgent extends EventEmitter implements Agent {
     return this.status;
   }
 
+  public getIssues(): AgentIssue[] {
+    return this.issues.map((issue) => this.issueWithDetails(issue));
+  }
+
+  public getServerHealthState(): ServerHealthState | null {
+    return this.serverHealthState;
+  }
+
   public requestAuthUrl(): CancelablePromise<{ authUrl: string; code: string } | null> {
     if (this.status === "notInitialized") {
       return cancelable(Promise.reject("Agent is not initialized"), () => {});
@@ -283,11 +405,17 @@ export class TabbyAgent extends EventEmitter implements Agent {
               choices: [],
             };
           }
-          const apiRequest = this.callApi(this.api.v1.completion, {
-            language: request.language,
-            segments,
-            user: this.auth?.user,
-          });
+          const apiRequest = this.callApi(
+            this.api.v1.completion,
+            {
+              language: request.language,
+              segments,
+              user: this.auth?.user,
+            },
+            {
+              timeout: request.manually ? this.config.completion.timeout.manually : this.config.completion.timeout.auto,
+            },
+          );
           cancelableList.push(apiRequest);
           return apiRequest
             .then((response) => {
diff --git a/clients/vscode/src/TabbyCompletionProvider.ts b/clients/vscode/src/TabbyCompletionProvider.ts
index 30bd8d6..852941a 100644
--- a/clients/vscode/src/TabbyCompletionProvider.ts
+++ b/clients/vscode/src/TabbyCompletionProvider.ts
@@ -4,6 +4,7 @@ import {
   InlineCompletionItem,
   InlineCompletionItemProvider,
   InlineCompletionList,
+  InlineCompletionTriggerKind,
   Position,
   ProviderResult,
   Range,
@@ -65,6 +66,7 @@ export class TabbyCompletionProvider implements InlineCompletionItemProvider {
       language: document.languageId,  // https://code.visualstudio.com/docs/languages/identifiers
       text: document.getText(),
       position: document.offsetAt(position),
+      manually: context.triggerKind === InlineCompletionTriggerKind.Invoke,
       maxPrefixLines: this.maxPrefixLines,
       maxSuffixLines: this.maxSuffixLines,
     };
diff --git a/clients/vscode/src/commands.ts b/clients/vscode/src/commands.ts
index 4b38d04..c8f2560 100644
--- a/clients/vscode/src/commands.ts
+++ b/clients/vscode/src/commands.ts
@@ -174,6 +174,16 @@ const statusBarItemClicked: Command = {
       case "unauthorized":
         notifications.showInformationStartAuth();
         break;
+      case "issuesExist":
+        switch (agent().getIssues()[0]?.name) {
+          case "slowCompletionResponseTime":
+            notifications.showInformationWhenSlowCompletionResponseTime();
+            break;
+          case "highCompletionTimeoutRate":
+            notifications.showInformationWhenHighCompletionTimeoutRate();
+            break;
+        }
+        break;
       case "disabled":
         const enabled = workspace.getConfiguration("tabby").get("codeCompletion", true);
         const inlineSuggestEnabled = workspace.getConfiguration("editor").get("inlineSuggest.enabled", true);
diff --git a/clients/vscode/src/notifications.ts b/clients/vscode/src/notifications.ts
index 3af5e4c..1111221 100644
--- a/clients/vscode/src/notifications.ts
+++ b/clients/vscode/src/notifications.ts
@@ -1,4 +1,5 @@
-import { commands, window, workspace, ConfigurationTarget } from "vscode";
+import { commands, window, workspace, env, ConfigurationTarget, Uri } from "vscode";
+import { agent } from "./agent";
 
 function showInformationWhenLoading() {
   window.showInformationMessage("Tabby is initializing.", "Settings").then((selection) => {
@@ -109,6 +110,113 @@ function showInformationWhenInlineSuggestDisabled() {
     });
 }
 
+function getHelpMessageForCompletionResponseTimeIssue() {
+  let helpMessageForRunningLargeModelOnCPU = "";
+  const serverHealthState = agent().getServerHealthState();
+  if (serverHealthState?.device === "cpu" && serverHealthState?.model?.match(/[0-9\.]+B$/)) {
+    helpMessageForRunningLargeModelOnCPU +=
+      `Your Tabby server is running model ${serverHealthState?.model} on CPU. ` +
+      "This model is too large to run on CPU, please try a smaller model or switch to GPU. " +
+      "You can find supported model list by search TabbyML on HuggingFace. \n";
+  }
+  let message = "";
+  if (helpMessageForRunningLargeModelOnCPU.length > 0) {
+    message += helpMessageForRunningLargeModelOnCPU + "\n";
+    message += "Other possible causes of this issue are: \n";
+  } else {
+    message += "Possible causes of this issue are: \n";
+  }
+  message += " - A poor network connection. Please check your network and proxy settings.\n";
+  message += " - Server overload. Please contact your Tabby server administrator for assistance.\n";
+  if (helpMessageForRunningLargeModelOnCPU.length == 0) {
+    message += ` - The running model ${serverHealthState?.model ?? ""} is too large to run on your Tabby server. `;
+    message += "Please try a smaller model. You can find supported model list by search TabbyML on HuggingFace.\n";
+  }
+  return message;
+}
+
+function showInformationWhenSlowCompletionResponseTime(modal: boolean = false) {
+  if (modal) {
+    const stats = agent()
+      .getIssues()
+      .find((issue) => issue.name === "slowCompletionResponseTime")?.completionResponseStats;
+    let statsMessage = "";
+    if (stats && stats["responses"] && stats["averageResponseTime"]) {
+      statsMessage = `The average response time of recent ${stats["responses"]} completion requests is ${Number(
+        stats["averageResponseTime"],
+      ).toFixed(0)}ms.\n\n`;
+    }
+    window
+      .showWarningMessage(
+        "Completion requests appear to take too much time.",
+        {
+          modal: true,
+          detail: statsMessage + getHelpMessageForCompletionResponseTimeIssue(),
+        },
+        "Supported Models",
+      )
+      .then((selection) => {
+        switch (selection) {
+          case "Supported Models":
+            env.openExternal(Uri.parse("https://huggingface.co/models?search=tabbyml"));
+            break;
+        }
+      });
+  } else {
+    window
+      .showWarningMessage("Completion requests appear to take too much time.", "Detail", "Settings")
+      .then((selection) => {
+        switch (selection) {
+          case "Detail":
+            showInformationWhenSlowCompletionResponseTime(true);
+            break;
+          case "Settings":
+            commands.executeCommand("tabby.openSettings");
+            break;
+        }
+      });
+  }
+}
+
+function showInformationWhenHighCompletionTimeoutRate(modal: boolean = false) {
+  if (modal) {
+    const stats = agent()
+      .getIssues()
+      .find((issue) => issue.name === "highCompletionTimeoutRate")?.completionResponseStats;
+    let statsMessage = "";
+    if (stats && stats["total"] && stats["timeouts"]) {
+      statsMessage = `${stats["timeouts"]} of ${stats["total"]} completion requests timed out.\n\n`;
+    }
+    window
+      .showWarningMessage(
+        "Most completion requests timed out.",
+        {
+          modal: true,
+          detail: statsMessage + getHelpMessageForCompletionResponseTimeIssue(),
+        },
+        "Supported Models",
+      )
+      .then((selection) => {
+        switch (selection) {
+          case "Supported Models":
+            env.openExternal(Uri.parse("https://huggingface.co/models?search=tabbyml"));
+            break;
+        }
+      });
+  } else {
+    window.showWarningMessage("Most completion requests timed out.", "Detail", "Settings").then((selection) => {
+      switch (selection) {
+        case "Detail":
+          showInformationWhenHighCompletionTimeoutRate(true);
+          break;
+        case "Settings":
+          commands.executeCommand("tabby.openSettings");
+          break;
+      }
+    });
+  }
+}
+
 export const notifications = {
   showInformationWhenLoading,
   showInformationWhenDisabled,
@@ -119,4 +227,6 @@ export const notifications = {
   showInformationWhenStartAuthButAlreadyAuthorized,
   showInformationWhenAuthFailed,
   showInformationWhenInlineSuggestDisabled,
+  showInformationWhenSlowCompletionResponseTime,
+  showInformationWhenHighCompletionTimeoutRate,
 };
diff --git a/clients/vscode/src/statusBarItem.ts b/clients/vscode/src/statusBarItem.ts
index 6d8c389..0418cea 100644
--- a/clients/vscode/src/statusBarItem.ts
+++ b/clients/vscode/src/statusBarItem.ts
@@ -8,6 +8,7 @@ const iconLoading = "$(loading~spin)";
 const iconReady = "$(check)";
 const iconDisconnected = "$(plug)";
 const iconUnauthorized = "$(key)";
+const iconIssueExist = "$(warning)";
 const iconDisabled = "$(x)";
 const colorNormal = new ThemeColor("statusBar.foreground");
 const colorWarning = new ThemeColor("statusBarItem.warningForeground");
@@ -20,15 +21,33 @@ const fsm = createMachine({
   initial: "loading",
   states: {
     loading: {
-      on: { ready: "ready", disconnected: "disconnected", unauthorized: "unauthorized", disabled: "disabled" },
+      on: {
+        ready: "ready",
+        disconnected: "disconnected",
+        unauthorized: "unauthorized",
+        issuesExist: "issuesExist",
+        disabled: "disabled",
+      },
       entry: () => toLoading(),
     },
     ready: {
-      on: { disconnected: "disconnected", unauthorized: "unauthorized", disabled: "disabled" },
+      on: {
+        loading: "loading",
+        disconnected: "disconnected",
+        unauthorized: "unauthorized",
+        issuesExist: "issuesExist",
+        disabled: "disabled",
+      },
       entry: () => toReady(),
     },
     disconnected: {
-      on: { ready: "ready", unauthorized: "unauthorized", disabled: "disabled" },
+      on: {
+        loading: "loading",
+        ready: "ready",
+        unauthorized: "unauthorized",
+        issuesExist: "issuesExist",
+        disabled: "disabled",
+      },
       entry: () => toDisconnected(),
     },
     unauthorized: {
@@ -36,17 +55,39 @@ const fsm = createMachine({
         ready: "ready",
         disconnected: "disconnected",
         disabled: "disabled",
+        issuesExist: "issuesExist",
         authStart: "unauthorizedAndAuthInProgress",
       },
       entry: () => toUnauthorized(),
     },
     unauthorizedAndAuthInProgress: {
-      // if auth succeeds, we will get `ready` before `authEnd` event
-      on: { ready: "ready", disconnected: "disconnected", disabled: "disabled", authEnd: "unauthorized" },
+      on: {
+        ready: "ready",
+        disconnected: "disconnected",
+        issuesExist: "issuesExist",
+        disabled: "disabled",
+        authEnd: "unauthorized", // if auth succeeds, we will get `ready` before `authEnd` event
+      },
       entry: () => toUnauthorizedAndAuthInProgress(),
     },
+    issuesExist: {
+      on: {
+        loading: "loading",
+        ready: "ready",
+        disconnected: "disconnected",
+        unauthorized: "unauthorized",
+        disabled: "disabled",
+      },
+      entry: () => toIssuesExist(),
+    },
     disabled: {
-      on: { loading: "loading", ready: "ready", disconnected: "disconnected", unauthorized: "unauthorized" },
+      on: {
+        loading: "loading",
+        ready: "ready",
+        disconnected: "disconnected",
+        unauthorized: "unauthorized",
+        issuesExist: "issuesExist",
+      },
       entry: () => toDisabled(),
     },
   },
@@ -93,6 +134,24 @@ function toUnauthorizedAndAuthInProgress() {
   item.command = undefined;
 }
 
+function toIssuesExist() {
+  item.color = colorWarning;
+  item.backgroundColor = backgroundColorWarning;
+  item.text = `${iconIssueExist} ${label}`;
+  switch (agent().getIssues()[0]?.name) {
+    case "slowCompletionResponseTime":
+      item.tooltip = "Completion requests appear to take too much time.";
+      break;
+    case "highCompletionTimeoutRate":
+      item.tooltip = "Most completion requests timed out.";
+      break;
+    default:
+      item.tooltip = "";
+      break;
+  }
+  item.command = { title: "", command: "tabby.statusBarItemClicked", arguments: ["issuesExist"] };
+}
+
 function toDisabled() {
   item.color = colorWarning;
   item.backgroundColor = backgroundColorWarning;
@@ -115,6 +174,7 @@ function updateStatusBarItem() {
       case "ready":
       case "disconnected":
       case "unauthorized":
+      case "issuesExist":
         fsmService.send(status);
         break;
     }
@@ -143,6 +203,14 @@ export const tabbyStatusBarItem = () => {
     });
   });
 
+  agent().on("newIssue", (event) => {
+    if (event.issue.name === "slowCompletionResponseTime") {
+      notifications.showInformationWhenSlowCompletionResponseTime();
+    } else if (event.issue.name === "highCompletionTimeoutRate") {
+      notifications.showInformationWhenHighCompletionTimeoutRate();
+    }
+  });
+
   item.show();
   return item;
 };