tabby/server/app.py

45 lines
1.3 KiB
Python
Raw Normal View History

import logging
2023-03-20 14:57:29 +00:00
import os
import events
2023-03-20 14:12:05 +00:00
import uvicorn
from fastapi import FastAPI, Response
from fastapi.responses import JSONResponse
from models import CompletionRequest, CompletionResponse
from python import PythonModelService
2023-03-20 14:57:29 +00:00
from triton import TritonService
2023-03-20 14:12:05 +00:00
app = FastAPI(
title="TabbyServer",
description="TabbyServer is the backend for tabby, serving code completion requests from code editor / IDE.",
docs_url="/",
)
MODEL_BACKEND = os.environ.get("MODEL_BACKEND", "python")
if MODEL_BACKEND == "triton":
model_backend = TritonService(
tokenizer_name=os.environ.get("TRITON_TOKENIZER_NAME", None),
host=os.environ.get("TRITON_HOST", "triton"),
port=os.environ.get("TRITON_PORT", "8001"),
)
else:
model_backend = PythonModelService(os.environ["PYTHON_MODEL_NAME"])
2023-03-20 14:57:29 +00:00
2023-03-20 14:12:05 +00:00
@app.post("/v1/completions")
async def completions(request: CompletionRequest) -> CompletionResponse:
response = model_backend(request)
events.log_completions(request, response)
return response
@app.post("/v1/completions/{id}/choices/{index}/selection")
async def selection(id: str, index: int) -> JSONResponse:
events.log_selection(id, index)
return JSONResponse(content="ok")
2023-03-20 14:12:05 +00:00
if __name__ == "__main__":
uvicorn.run("app:app", host="0.0.0.0", port=5000)