parent
fbcab616d7
commit
f177fab951
|
|
@ -1 +1,2 @@
|
|||
__pycache__
|
||||
docker-compose.override.yml
|
||||
|
|
|
|||
|
|
@ -0,0 +1,17 @@
|
|||
FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
|
||||
|
||||
ENV POETRY_VERSION=1.4.0
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
|
||||
|
||||
RUN pip install "poetry==$POETRY_VERSION"
|
||||
|
||||
COPY poetry.lock pyproject.toml /app/
|
||||
RUN poetry export --without-hashes -o requirements.txt
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY ./preprocess preprocess
|
||||
COPY ./server server
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
version: '3.3'
|
||||
services:
|
||||
triton:
|
||||
image: ghcr.io/tabbyml/fastertransformer_backend:main
|
||||
command: mpirun -n 1 --allow-run-as-root /opt/tritonserver/bin/tritonserver --model-repository=/model
|
||||
shm_size: 1gb
|
||||
ports:
|
||||
- "8001:8001"
|
||||
volumes:
|
||||
- ./testdata:/model
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
|
|
@ -0,0 +1,30 @@
|
|||
version: '3.3'
|
||||
|
||||
services:
|
||||
server:
|
||||
build: .
|
||||
working_dir: /app
|
||||
command: /opt/conda/bin/python server/app.py
|
||||
environment:
|
||||
- TOKENIZER_NAME=/tokenizer
|
||||
- TRITON_HOST=triton
|
||||
ports:
|
||||
- "5000:5000"
|
||||
volumes:
|
||||
- ./testdata/gptneox/tokenizer:/tokenizer
|
||||
links:
|
||||
- triton
|
||||
|
||||
triton:
|
||||
image: tabbyml/fastertransformer_backend
|
||||
command: mpirun -n 1 --allow-run-as-root /opt/tritonserver/bin/tritonserver --model-repository=/model
|
||||
shm_size: 1gb
|
||||
volumes:
|
||||
- ./testdata/gptneox/models:/model
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
count: all
|
||||
capabilities: [gpu]
|
||||
|
|
@ -473,7 +473,7 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""}
|
|||
name = "cmake"
|
||||
version = "3.26.0"
|
||||
description = "CMake is an open-source, cross-platform family of tools designed to build, test and package software"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
|
|
@ -1118,7 +1118,7 @@ files = [
|
|||
name = "jinja2"
|
||||
version = "3.1.2"
|
||||
description = "A very fast and expressive template engine."
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
|
|
@ -1136,7 +1136,7 @@ i18n = ["Babel (>=2.7)"]
|
|||
name = "lit"
|
||||
version = "15.0.7"
|
||||
description = "A Software Testing Tool"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
|
|
@ -1147,7 +1147,7 @@ files = [
|
|||
name = "markupsafe"
|
||||
version = "2.1.2"
|
||||
description = "Safely add untrusted strings to HTML/XML markup."
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
|
|
@ -1207,7 +1207,7 @@ files = [
|
|||
name = "mpmath"
|
||||
version = "1.3.0"
|
||||
description = "Python library for arbitrary-precision floating-point arithmetic"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
|
|
@ -1336,7 +1336,7 @@ dill = ">=0.3.6"
|
|||
name = "networkx"
|
||||
version = "3.0"
|
||||
description = "Python package for creating and manipulating graphs and networks"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
|
|
@ -1408,7 +1408,7 @@ files = [
|
|||
name = "nvidia-cublas-cu11"
|
||||
version = "11.10.3.66"
|
||||
description = "CUBLAS native runtime libraries"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3"
|
||||
files = [
|
||||
|
|
@ -1424,7 +1424,7 @@ wheel = "*"
|
|||
name = "nvidia-cuda-cupti-cu11"
|
||||
version = "11.7.101"
|
||||
description = "CUDA profiling tools runtime libs."
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3"
|
||||
files = [
|
||||
|
|
@ -1440,7 +1440,7 @@ wheel = "*"
|
|||
name = "nvidia-cuda-nvrtc-cu11"
|
||||
version = "11.7.99"
|
||||
description = "NVRTC native runtime libraries"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3"
|
||||
files = [
|
||||
|
|
@ -1457,7 +1457,7 @@ wheel = "*"
|
|||
name = "nvidia-cuda-runtime-cu11"
|
||||
version = "11.7.99"
|
||||
description = "CUDA Runtime native Libraries"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3"
|
||||
files = [
|
||||
|
|
@ -1473,7 +1473,7 @@ wheel = "*"
|
|||
name = "nvidia-cudnn-cu11"
|
||||
version = "8.5.0.96"
|
||||
description = "cuDNN runtime libraries"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3"
|
||||
files = [
|
||||
|
|
@ -1489,7 +1489,7 @@ wheel = "*"
|
|||
name = "nvidia-cufft-cu11"
|
||||
version = "10.9.0.58"
|
||||
description = "CUFFT native runtime libraries"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3"
|
||||
files = [
|
||||
|
|
@ -1501,7 +1501,7 @@ files = [
|
|||
name = "nvidia-curand-cu11"
|
||||
version = "10.2.10.91"
|
||||
description = "CURAND native runtime libraries"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3"
|
||||
files = [
|
||||
|
|
@ -1517,7 +1517,7 @@ wheel = "*"
|
|||
name = "nvidia-cusolver-cu11"
|
||||
version = "11.4.0.1"
|
||||
description = "CUDA solver native runtime libraries"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3"
|
||||
files = [
|
||||
|
|
@ -1534,7 +1534,7 @@ wheel = "*"
|
|||
name = "nvidia-cusparse-cu11"
|
||||
version = "11.7.4.91"
|
||||
description = "CUSPARSE native runtime libraries"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3"
|
||||
files = [
|
||||
|
|
@ -1550,7 +1550,7 @@ wheel = "*"
|
|||
name = "nvidia-nccl-cu11"
|
||||
version = "2.14.3"
|
||||
description = "NVIDIA Collective Communication Library (NCCL) Runtime"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3"
|
||||
files = [
|
||||
|
|
@ -1561,7 +1561,7 @@ files = [
|
|||
name = "nvidia-nvtx-cu11"
|
||||
version = "11.7.91"
|
||||
description = "NVIDIA Tools Extension"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3"
|
||||
files = [
|
||||
|
|
@ -2148,7 +2148,7 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyam
|
|||
name = "sympy"
|
||||
version = "1.11.1"
|
||||
description = "Computer algebra system (CAS) in Python"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.8"
|
||||
files = [
|
||||
|
|
@ -2218,7 +2218,7 @@ testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests"]
|
|||
name = "torch"
|
||||
version = "2.0.0"
|
||||
description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.8.0"
|
||||
files = [
|
||||
|
|
@ -2359,7 +2359,7 @@ vision = ["Pillow"]
|
|||
name = "triton"
|
||||
version = "2.0.0"
|
||||
description = "A language and compiler for custom Deep Learning operations"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
files = [
|
||||
|
|
@ -2484,7 +2484,7 @@ test = ["covdefaults (>=2.2.2)", "coverage (>=7.1)", "coverage-enable-subprocess
|
|||
name = "wheel"
|
||||
version = "0.40.0"
|
||||
description = "A built-package format for Python"
|
||||
category = "main"
|
||||
category = "dev"
|
||||
optional = false
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
|
|
@ -2761,4 +2761,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"]
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = "^3.10"
|
||||
content-hash = "4bc92342585b459fe432e095b5c8b1c6590551573156a2b1fd795a7906e21337"
|
||||
content-hash = "fe4f298c8e9e421dab4834a61eda5d46f8720736ec5d604fb4abff2d90020835"
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ readme = "README.md"
|
|||
python = "^3.10"
|
||||
datasets = "^2.10.1"
|
||||
transformers = "^4.27.1"
|
||||
torch = "^2.0.0"
|
||||
fastapi = "^0.95.0"
|
||||
uvicorn = "^0.21.1"
|
||||
tritonclient = {extras = ["all"], version = "^2.31.0"}
|
||||
|
|
@ -18,6 +17,7 @@ tritonclient = {extras = ["all"], version = "^2.31.0"}
|
|||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pre-commit = "^3.1.1"
|
||||
torch = "^2.0.0"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
|
|
|
|||
|
|
@ -12,7 +12,11 @@ app = FastAPI(
|
|||
docs_url="/",
|
||||
)
|
||||
|
||||
triton = TritonService(os.environ["TOKENIZER_NAME"])
|
||||
triton = TritonService(
|
||||
tokenizer_name=os.environ.get("TOKENIZER_NAME", None),
|
||||
host=os.environ.get("TRITON_HOST", "localhost"),
|
||||
port=os.environ.get("TRITON_PORT", "8001"),
|
||||
)
|
||||
|
||||
|
||||
@app.post("/v1/completions")
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue