docs: improve
parent
09cd8a38ed
commit
ffe777e519
|
|
@ -1,59 +0,0 @@
|
||||||
# Docker
|
|
||||||
|
|
||||||
There is a supplied docker image to make deploying a server as a container easier.
|
|
||||||
|
|
||||||
|
|
||||||
## CPU
|
|
||||||
|
|
||||||
**Command line**
|
|
||||||
```bash
|
|
||||||
docker run \
|
|
||||||
-p 8080:8080 -v $HOME/.tabby:/data \
|
|
||||||
tabbyml/tabby serve --model TabbyML/SantaCoder-1B
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
**Docker Compose**
|
|
||||||
```yaml
|
|
||||||
version: '3.5'
|
|
||||||
|
|
||||||
services:
|
|
||||||
tabby:
|
|
||||||
restart: always
|
|
||||||
image: tabbyml/tabby
|
|
||||||
command: serve --model TabbyML/SantaCoder-1B
|
|
||||||
volumes:
|
|
||||||
- "$HOME/.tabby:/data"
|
|
||||||
ports:
|
|
||||||
- 8080:8080
|
|
||||||
```
|
|
||||||
|
|
||||||
## CUDA (requires NVIDIA Container Toolkit)
|
|
||||||
|
|
||||||
**Command line**
|
|
||||||
```bash
|
|
||||||
docker run \
|
|
||||||
--gpus all -p 8080:8080 -v $HOME/.tabby:/data \
|
|
||||||
tabbyml/tabby \
|
|
||||||
serve --model TabbyML/SantaCoder-1B --device cuda
|
|
||||||
```
|
|
||||||
|
|
||||||
**Docker Compose**
|
|
||||||
```yaml
|
|
||||||
version: '3.5'
|
|
||||||
services:
|
|
||||||
tabby:
|
|
||||||
restart: always
|
|
||||||
image: tabbyml/tabby
|
|
||||||
command: serve --model TabbyML/SantaCoder-1B --device cuda
|
|
||||||
volumes:
|
|
||||||
- "$HOME/.tabby:/data"
|
|
||||||
ports:
|
|
||||||
- 8080:8080
|
|
||||||
resources:
|
|
||||||
reservations:
|
|
||||||
devices:
|
|
||||||
- driver: nvidia
|
|
||||||
count: 1
|
|
||||||
capabilities: [gpu]
|
|
||||||
```
|
|
||||||
|
|
@ -0,0 +1,76 @@
|
||||||
|
# Docker
|
||||||
|
|
||||||
|
There is a supplied docker image to make deploying a server as a container easier.
|
||||||
|
|
||||||
|
import Tabs from '@theme/Tabs';
|
||||||
|
import TabItem from '@theme/TabItem';
|
||||||
|
|
||||||
|
## CPU
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="shell" label="Shell" default>
|
||||||
|
|
||||||
|
```bash title="run.sh"
|
||||||
|
docker run \
|
||||||
|
-p 8080:8080 -v $HOME/.tabby:/data \
|
||||||
|
tabbyml/tabby serve --model TabbyML/SantaCoder-1B
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="compose" label="Docker Compose">
|
||||||
|
|
||||||
|
```yaml title="docker-compose.yml"
|
||||||
|
version: '3.5'
|
||||||
|
|
||||||
|
services:
|
||||||
|
tabby:
|
||||||
|
restart: always
|
||||||
|
image: tabbyml/tabby
|
||||||
|
command: serve --model TabbyML/SantaCoder-1B
|
||||||
|
volumes:
|
||||||
|
- "$HOME/.tabby:/data"
|
||||||
|
ports:
|
||||||
|
- 8080:8080
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
||||||
|
## CUDA (requires NVIDIA Container Toolkit)
|
||||||
|
|
||||||
|
<Tabs>
|
||||||
|
<TabItem value="shell" label="Shell" default>
|
||||||
|
|
||||||
|
```bash title="run.sh"
|
||||||
|
docker run \
|
||||||
|
--gpus all -p 8080:8080 -v $HOME/.tabby:/data \
|
||||||
|
tabbyml/tabby \
|
||||||
|
serve --model TabbyML/SantaCoder-1B --device cuda
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
<TabItem value="compose" label="Docker Compose">
|
||||||
|
|
||||||
|
```yaml title="docker-compose.yml"
|
||||||
|
version: '3.5'
|
||||||
|
services:
|
||||||
|
tabby:
|
||||||
|
restart: always
|
||||||
|
image: tabbyml/tabby
|
||||||
|
command: serve --model TabbyML/SantaCoder-1B --device cuda
|
||||||
|
volumes:
|
||||||
|
- "$HOME/.tabby:/data"
|
||||||
|
ports:
|
||||||
|
- 8080:8080
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: 1
|
||||||
|
capabilities: [gpu]
|
||||||
|
```
|
||||||
|
|
||||||
|
</TabItem>
|
||||||
|
</Tabs>
|
||||||
|
|
||||||
|
|
@ -7,3 +7,7 @@ Thanks to Apple's Accelerate and CoreML frameworks, we can now run Tabby on edge
|
||||||
2. Run `tabby --help` to verify successful installation.
|
2. Run `tabby --help` to verify successful installation.
|
||||||
|
|
||||||
3. Start the server with `tabby serve model --models TabbyML/T5P-220M`.
|
3. Start the server with `tabby serve model --models TabbyML/T5P-220M`.
|
||||||
|
|
||||||
|
|
||||||
|
:::tip
|
||||||
|
The compute power of M1/M2 is limited and is likely to be sufficient only for individual usage. If you require a shared instance for a team, we recommend considering Docker hosting with CUDA. You can find more information about Docker [here](./docker).
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue