From d5d58fbbecf295f057a3a508c2f8cfd1bb76ca0b Mon Sep 17 00:00:00 2001
From: Meng Zhang <wsxiaoys.lh@gmail.com>
Date: Mon, 27 Mar 2023 11:46:18 +0800
Subject: [PATCH] Improve documentations. (#20)

* Improve help message of model preload

* Update development/scripts/triton.sh

* Improve documents

* Update deployment.md

* Update deployment.md
---
 README.md                     |  2 +-
 deployment/README.md          | 13 -------------
 development/scripts/triton.sh |  4 ++++
 docs/deployment.md            | 23 +++++++++++++++++++++++
 tabby/tools/model_preload.py  |  7 ++++++-
 5 files changed, 34 insertions(+), 15 deletions(-)
 delete mode 100644 deployment/README.md
 create mode 100644 docs/deployment.md

diff --git a/README.md b/README.md
index 576166a..5cc7deb 100644
--- a/README.md
+++ b/README.md
@@ -10,4 +10,4 @@ An opensource / on-prem alternative to GitHub Copilot
 
 ## Deployment
 
-See [deployment](./deployment/README.md)
+See [deployment](./docs/deployment.md)
diff --git a/deployment/README.md b/deployment/README.md
deleted file mode 100644
index d3dbf52..0000000
--- a/deployment/README.md
+++ /dev/null
@@ -1,13 +0,0 @@
-## Prerequisites
-You need install following dependencies
-* docker `>= 17.06`
-* An NVIDIA GPU with enough VRAM to run the model you want.
-* [NVIDIA Docker Driver](https://docs.nvidia.com/datacenter/tesla/tesla-installation-notes/index.html)
-
-## Setup
-
-`docker-compose up`
-
-Open Admin Panel [http://localhost:8501](http://localhost:8501)
-
-![image](https://user-images.githubusercontent.com/388154/227792390-ec19e9b9-ebbb-4a94-99ca-8a142ffb5e46.png)
diff --git a/development/scripts/triton.sh b/development/scripts/triton.sh
index c86f3eb..bb0b05c 100755
--- a/development/scripts/triton.sh
+++ b/development/scripts/triton.sh
@@ -1,6 +1,9 @@
 #!/bin/bash
 set -e
 
+if [ -d "$MODEL_NAME" ]; then
+MODEL_DIR="$MODEL_NAME"
+else
 # Get model dir.
 MODEL_DIR=$(python3 <<EOF
 from huggingface_hub import snapshot_download
@@ -8,6 +11,7 @@ from huggingface_hub import snapshot_download
 print(snapshot_download(repo_id='$MODEL_NAME', allow_patterns='triton/**/*', local_files_only=True))
 EOF
 )
+fi
 
 # Set model dir in triton config.
 sed -i 's@${MODEL_DIR}@'$MODEL_DIR'@g' $MODEL_DIR/triton/fastertransformer/config.pbtxt
diff --git a/docs/deployment.md b/docs/deployment.md
new file mode 100644
index 0000000..22fbc83
--- /dev/null
+++ b/docs/deployment.md
@@ -0,0 +1,23 @@
+## Prerequisites
+
+You need install following dependencies
+* docker `>= 17.06`
+* An NVIDIA GPU with enough VRAM to run the model you want.
+* [NVIDIA Docker Driver](https://docs.nvidia.com/datacenter/tesla/tesla-installation-notes/index.html)
+
+## Setup Tabby Server with `docker-compose`.
+
+1. Goto [`deployment`](../deployment) directory
+2. Execute `docker-compose up`.
+
+## Tabby Client
+
+There're several ways to talk to the Tabby Server.
+
+### Tabby Admin Panel [http://localhost:8501](http://localhost:8501)
+
+![image](https://user-images.githubusercontent.com/388154/227792390-ec19e9b9-ebbb-4a94-99ca-8a142ffb5e46.png)
+
+### OpenAPI [http://localhost:5000](http://localhost:5000)
+
+![image](https://user-images.githubusercontent.com/388154/227835790-29e21eb5-6e9c-45ab-aa0f-c4c7ce399ad7.png)
diff --git a/tabby/tools/model_preload.py b/tabby/tools/model_preload.py
index f7bf7b4..89127e1 100644
--- a/tabby/tools/model_preload.py
+++ b/tabby/tools/model_preload.py
@@ -9,7 +9,12 @@ class Arguments:
     repo_id: str = field(
         metadata={"help": "Huggingface model repository id, e.g TabbyML/NeoX-160M"}
     )
-    prefer_local_files: bool = True
+    prefer_local_files: bool = field(
+        metadata={
+            "help": "Whether prefer loading local files (skip remote version check if local files are valid)."
+        },
+        default=True,
+    )
 
 
 def parse_args():