fix(docs): add installation guide on modal (#608)

* fix(docs): add installation guide on modal

* remove modal-deploy
r0.4
Meng Zhang 2023-10-21 22:47:48 -07:00 committed by GitHub
parent c03be1596d
commit 912f8aab3d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 137 additions and 11 deletions

View File

@ -1,11 +1,4 @@
""" from modal import Image, Stub, asgi_app, gpu
modal serve app.py
"""
from pathlib import Path
import modal
from modal import Image, Mount, Secret, Stub, asgi_app, gpu, method
IMAGE_NAME = "tabbyml/tabby:0.3.1" IMAGE_NAME = "tabbyml/tabby:0.3.1"
MODEL_ID = "TabbyML/StarCoder-1B" MODEL_ID = "TabbyML/StarCoder-1B"
@ -27,7 +20,7 @@ def download_model():
image = ( image = (
Image.from_registry( Image.from_registry(
"tabbyml/tabby:0.3.1", IMAGE_NAME,
add_python="3.11", add_python="3.11",
) )
.dockerfile_commands("ENTRYPOINT []") .dockerfile_commands("ENTRYPOINT []")
@ -65,7 +58,7 @@ def app():
) )
# Poll until webserver at 127.0.0.1:8000 accepts connections before running inputs. # Poll until webserver at 127.0.0.1:8000 accepts connections before running inputs.
def webserver_ready(): def tabby_ready():
try: try:
socket.create_connection(("127.0.0.1", 8000), timeout=1).close() socket.create_connection(("127.0.0.1", 8000), timeout=1).close()
return True return True
@ -77,7 +70,7 @@ def app():
raise RuntimeError(f"launcher exited unexpectedly with code {retcode}") raise RuntimeError(f"launcher exited unexpectedly with code {retcode}")
return False return False
while not webserver_ready(): while not tabby_ready():
time.sleep(1.0) time.sleep(1.0)
print("Tabby server ready!") print("Tabby server ready!")

View File

@ -0,0 +1,124 @@
# Modal
Modal is a serverless GPU provider. By leveraging Modal, your Tabby instance will run on demand. When there are no requests to the Tabby server for a certain amount of time, Modal will schedule the container to sleep, thereby saving GPU costs.
## Setup
First we import the components we need from `modal`.
```python
from modal import Image, Mount, Secret, Stub, asgi_app, gpu, method
```
Next, we set the base docker image version, which model to serve, taking care to specify the GPU configuration required to fit the model into VRAM.
```python
MODEL_ID = "TabbyML/StarCoder-1B"
GPU_CONFIG = gpu.T4()
```
## Define the container image
We want to create a Modal image which has the Tabby model cache pre-populated. The benefit of this is that the container no longer has to re-download the model - instead, it will take advantage of Modals internal filesystem for faster cold starts.
### Download the weights
```python
def download_model():
import subprocess
subprocess.run(
[
"/opt/tabby/bin/tabby",
"download",
"--model",
MODEL_ID,
]
)
```
### Image definition
Well start from a image by tabby, and override the default ENTRYPOINT for Modal to run its own which enables seamless serverless deployments.
Next we run the download step to pre-populate the image with our model weights.
Finally, we install the `asgi-proxy-lib` to interface with modal's asgi webserver over localhost.
```python
image = (
Image.from_registry(
"tabbyml/tabby:0.3.1",
add_python="3.11",
)
.dockerfile_commands("ENTRYPOINT []")
.run_function(download_model)
.pip_install("asgi-proxy-lib")
)
```
### The app function
The endpoint function is represented with Modal's `@stub.function`. Here, we:
1. Launch the Tabby process and wait for it to be ready to accept requests.
2. Create an ASGI proxy to tunnel requests from the Modal web endpoint to the local Tabby server.
3. Specify that each container is allowed to handle up to 10 requests simultaneously.
4. Keep idle containers for 2 minutes before spinning them down.
```python
@stub.function(
gpu=GPU_CONFIG,
allow_concurrent_inputs=10,
container_idle_timeout=120,
timeout=360,
)
@asgi_app()
def app():
import socket
import subprocess
import time
from asgi_proxy import asgi_proxy
launcher = subprocess.Popen(
[
"/opt/tabby/bin/tabby",
"serve",
"--model",
MODEL_ID,
"--port",
"8000",
"--device",
"cuda",
]
)
# Poll until webserver at 127.0.0.1:8000 accepts connections before running inputs.
def tabby_ready():
try:
socket.create_connection(("127.0.0.1", 8000), timeout=1).close()
return True
except (socket.timeout, ConnectionRefusedError):
# Check if launcher webserving process has exited.
# If so, a connection can never be made.
retcode = launcher.poll()
if retcode is not None:
raise RuntimeError(f"launcher exited unexpectedly with code {retcode}")
return False
while not tabby_ready():
time.sleep(1.0)
print("Tabby server ready!")
return asgi_proxy("http://localhost:8000")
```
### Serve the app
Once we deploy this model with `modal serve app.py`, it will output the url of the web endpoint, in a form of `https://<USERNAME>--tabby-server-starcoder-1b-app-dev.modal.run`, it can be used as tabby server url in tabby editor extensions!
See [app.py](https://github.com/TabbyML/tabby/tree/main/website/docs/modal/app.py) for a complete example.
## Feedback and support
If you have improvement suggestions or need specific support, please join [Tabby Slack community](https://join.slack.com/t/tabbycommunity/shared_invite/zt-1xeiddizp-bciR2RtFTaJ37RBxr8VxpA) or reach out on [Tabbys GitHub repository](https://github.com/TabbyML/tabby).

View File

@ -25,6 +25,7 @@
"postcss": "^8.4.24", "postcss": "^8.4.24",
"posthog-docusaurus": "^2.0.0", "posthog-docusaurus": "^2.0.0",
"prism-react-renderer": "^1.3.5", "prism-react-renderer": "^1.3.5",
"raw-loader": "^4.0.2",
"react": "^17.0.2", "react": "^17.0.2",
"react-dom": "^17.0.2", "react-dom": "^17.0.2",
"tailwindcss": "^3.3.2", "tailwindcss": "^3.3.2",

View File

@ -6893,6 +6893,14 @@ raw-body@2.5.1:
iconv-lite "0.4.24" iconv-lite "0.4.24"
unpipe "1.0.0" unpipe "1.0.0"
raw-loader@^4.0.2:
version "4.0.2"
resolved "https://registry.yarnpkg.com/raw-loader/-/raw-loader-4.0.2.tgz#1aac6b7d1ad1501e66efdac1522c73e59a584eb6"
integrity sha512-ZnScIV3ag9A4wPX/ZayxL/jZH+euYb6FcUinPcgiQW0+UBtEv0O6Q3lGd3cqJ+GHH+rksEv3Pj99oxJ3u3VIKA==
dependencies:
loader-utils "^2.0.0"
schema-utils "^3.0.0"
rc@1.2.8, rc@^1.2.8: rc@1.2.8, rc@^1.2.8:
version "1.2.8" version "1.2.8"
resolved "https://registry.yarnpkg.com/rc/-/rc-1.2.8.tgz#cd924bf5200a075b83c188cd6b9e211b7fc0d3ed" resolved "https://registry.yarnpkg.com/rc/-/rc-1.2.8.tgz#cd924bf5200a075b83c188cd6b9e211b7fc0d3ed"