25 lines
638 B
Python
25 lines
638 B
Python
import os
|
|
|
|
import uvicorn
|
|
from fastapi import FastAPI, Response
|
|
from fastapi.responses import JSONResponse
|
|
from models import CompletionsRequest, CompletionsResponse
|
|
from triton import TritonService
|
|
|
|
app = FastAPI(
|
|
title="TabbyServer",
|
|
description="TabbyServer is the backend for tabby, serving code completion requests from code editor / IDE.",
|
|
docs_url="/",
|
|
)
|
|
|
|
triton = TritonService(os.environ["TOKENIZER_NAME"])
|
|
|
|
|
|
@app.post("/v1/completions")
|
|
async def completions(data: CompletionsRequest) -> CompletionsResponse:
|
|
return triton(data)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
uvicorn.run("app:app", host="0.0.0.0", port=5000)
|