feat: make --model optional, so user can start a chat only instance (#817)
parent
fb2b8dab2b
commit
25621547a7
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
## Fixes and Improvements
|
||||
* Fix the slow repository indexing due to constraint memory arena in tantivy index writer.
|
||||
* Make `--model` optional, so users can create a chat only instance.
|
||||
|
||||
# v0.5.5
|
||||
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ struct ApiDoc;
|
|||
pub struct ServeArgs {
|
||||
/// Model id for `/completions` API endpoint.
|
||||
#[clap(long)]
|
||||
model: String,
|
||||
model: Option<String>,
|
||||
|
||||
/// Model id for `/chat/completions` API endpoints.
|
||||
#[clap(long)]
|
||||
|
|
@ -122,7 +122,10 @@ pub async fn main(config: &Config, args: &ServeArgs) {
|
|||
}
|
||||
|
||||
async fn load_model(args: &ServeArgs) {
|
||||
download_model_if_needed(&args.model).await;
|
||||
if let Some(model) = &args.model {
|
||||
download_model_if_needed(model).await;
|
||||
}
|
||||
|
||||
if let Some(chat_model) = &args.chat_model {
|
||||
download_model_if_needed(chat_model).await
|
||||
}
|
||||
|
|
@ -131,16 +134,21 @@ async fn load_model(args: &ServeArgs) {
|
|||
async fn api_router(args: &ServeArgs, config: &Config) -> Router {
|
||||
let logger = Arc::new(create_logger());
|
||||
let code = Arc::new(crate::services::code::create_code_search());
|
||||
let completion = Arc::new(
|
||||
create_completion_service(
|
||||
code.clone(),
|
||||
logger.clone(),
|
||||
&args.model,
|
||||
&args.device,
|
||||
args.parallelism,
|
||||
)
|
||||
.await,
|
||||
);
|
||||
|
||||
let completion_state = if let Some(model) = &args.model {
|
||||
Some(Arc::new(
|
||||
create_completion_service(
|
||||
code.clone(),
|
||||
logger.clone(),
|
||||
model,
|
||||
&args.device,
|
||||
args.parallelism,
|
||||
)
|
||||
.await,
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let chat_state = if let Some(chat_model) = &args.chat_model {
|
||||
Some(Arc::new(
|
||||
|
|
@ -153,7 +161,7 @@ async fn api_router(args: &ServeArgs, config: &Config) -> Router {
|
|||
let mut routers = vec![];
|
||||
|
||||
let health_state = Arc::new(health::HealthState::new(
|
||||
&args.model,
|
||||
args.model.as_deref(),
|
||||
args.chat_model.as_deref(),
|
||||
&args.device,
|
||||
));
|
||||
|
|
@ -173,16 +181,18 @@ async fn api_router(args: &ServeArgs, config: &Config) -> Router {
|
|||
)
|
||||
});
|
||||
|
||||
routers.push({
|
||||
Router::new()
|
||||
.route(
|
||||
"/v1/completions",
|
||||
routing::post(routes::completions).with_state(completion),
|
||||
)
|
||||
.layer(TimeoutLayer::new(Duration::from_secs(
|
||||
config.server.completion_timeout,
|
||||
)))
|
||||
});
|
||||
if let Some(completion_state) = completion_state {
|
||||
routers.push({
|
||||
Router::new()
|
||||
.route(
|
||||
"/v1/completions",
|
||||
routing::post(routes::completions).with_state(completion_state),
|
||||
)
|
||||
.layer(TimeoutLayer::new(Duration::from_secs(
|
||||
config.server.completion_timeout,
|
||||
)))
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(chat_state) = chat_state {
|
||||
routers.push({
|
||||
|
|
@ -209,7 +219,11 @@ async fn api_router(args: &ServeArgs, config: &Config) -> Router {
|
|||
}
|
||||
|
||||
fn start_heartbeat(args: &ServeArgs) {
|
||||
let state = health::HealthState::new(&args.model, args.chat_model.as_deref(), &args.device);
|
||||
let state = health::HealthState::new(
|
||||
args.model.as_deref(),
|
||||
args.chat_model.as_deref(),
|
||||
&args.device,
|
||||
);
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
usage::capture("ServeHealth", &state).await;
|
||||
|
|
|
|||
|
|
@ -10,7 +10,8 @@ use crate::Device;
|
|||
|
||||
#[derive(Serialize, Deserialize, ToSchema, Clone, Debug)]
|
||||
pub struct HealthState {
|
||||
model: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
model: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
chat_model: Option<String>,
|
||||
device: String,
|
||||
|
|
@ -22,7 +23,7 @@ pub struct HealthState {
|
|||
}
|
||||
|
||||
impl HealthState {
|
||||
pub fn new(model: &str, chat_model: Option<&str>, device: &Device) -> Self {
|
||||
pub fn new(model: Option<&str>, chat_model: Option<&str>, device: &Device) -> Self {
|
||||
let (cpu_info, cpu_count) = read_cpu_info();
|
||||
|
||||
let cuda_devices = match read_cuda_devices() {
|
||||
|
|
@ -31,7 +32,7 @@ impl HealthState {
|
|||
};
|
||||
|
||||
Self {
|
||||
model: model.to_owned(),
|
||||
model: model.map(|x| x.to_owned()),
|
||||
chat_model: chat_model.map(|x| x.to_owned()),
|
||||
device: device.to_string(),
|
||||
arch: ARCH.to_string(),
|
||||
|
|
|
|||
|
|
@ -98,12 +98,13 @@ function MainPanel() {
|
|||
<div className="mt-4 rounded-lg bg-zinc-100 p-4 dark:bg-zinc-800">
|
||||
<span className="font-bold">Workers</span>
|
||||
<div className="mt-4 flex flex-col gap-4 lg:flex-row lg:flex-wrap">
|
||||
<WorkerCard
|
||||
source="localhost"
|
||||
name={healthInfo.model}
|
||||
type="completion"
|
||||
health={healthInfo}
|
||||
/>
|
||||
{healthInfo.model &&
|
||||
<WorkerCard
|
||||
source="localhost"
|
||||
name={healthInfo.model}
|
||||
type="completion"
|
||||
health={healthInfo}
|
||||
/>}
|
||||
{healthInfo.chat_model && (
|
||||
<WorkerCard
|
||||
source="localhost"
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import fetcher from '@/lib/tabby-fetcher'
|
|||
|
||||
export interface HealthInfo {
|
||||
device: 'metal' | 'cpu' | 'cuda'
|
||||
model: string
|
||||
model?: string
|
||||
chat_model?: string
|
||||
cpu_info: string
|
||||
cpu_count: number
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -1,7 +1,7 @@
|
|||
1:HL["/_next/static/media/86fdec36ddd9097e-s.p.woff2","font",{"crossOrigin":"","type":"font/woff2"}]
|
||||
2:HL["/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2","font",{"crossOrigin":"","type":"font/woff2"}]
|
||||
3:HL["/_next/static/css/83506ffe28f4b91c.css","style"]
|
||||
0:["y8UTfVMfS3WRo-O_Ue2fm",[[["",{"children":["(dashboard)",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],"$L4",[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/83506ffe28f4b91c.css","precedence":"next"}]],"$L5"]]]]
|
||||
0:["mnxnTVxz0_SgSNkjMXene",[[["",{"children":["(dashboard)",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],"$L4",[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/83506ffe28f4b91c.css","precedence":"next"}]],"$L5"]]]]
|
||||
6:I{"id":5925,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Toaster","async":false}
|
||||
7:I{"id":78495,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Providers","async":false}
|
||||
8:I{"id":11486,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Header","async":false}
|
||||
|
|
@ -9,7 +9,7 @@
|
|||
a:I{"id":18639,"chunks":["272:static/chunks/webpack-7f4514982162b5cb.js","971:static/chunks/fd9d1056-6779f76bb208370f.js","590:static/chunks/590-758bf4c4ecd0fce8.js"],"name":"","async":false}
|
||||
b:I{"id":30831,"chunks":["882:static/chunks/882-5574357230a12cf9.js","396:static/chunks/396-89319c375c83667c.js","642:static/chunks/app/(dashboard)/layout-81899020c101ed0e.js"],"name":"","async":false}
|
||||
d:I{"id":65146,"chunks":["272:static/chunks/webpack-7f4514982162b5cb.js","971:static/chunks/fd9d1056-6779f76bb208370f.js","590:static/chunks/590-758bf4c4ecd0fce8.js"],"name":"","async":false}
|
||||
e:I{"id":60901,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","286:static/chunks/286-453e01dda7220f79.js","967:static/chunks/967-f02728ac0b1b01a8.js","130:static/chunks/app/(dashboard)/page-1cc6ccaef036b0bc.js"],"name":"","async":false}
|
||||
e:I{"id":60901,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","286:static/chunks/286-453e01dda7220f79.js","967:static/chunks/967-f02728ac0b1b01a8.js","130:static/chunks/app/(dashboard)/page-fe128a03e1e2a101.js"],"name":"","async":false}
|
||||
5:[["$","meta","0",{"charSet":"utf-8"}],["$","title","1",{"children":"Tabby - Dashboard"}],["$","meta","2",{"name":"description","content":"Tabby, an opensource, self-hosted AI coding assistant."}],["$","meta","3",{"name":"theme-color","media":"(prefers-color-scheme: light)","content":"white"}],["$","meta","4",{"name":"theme-color","media":"(prefers-color-scheme: dark)","content":"black"}],["$","meta","5",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","6",{"name":"next-size-adjust"}]]
|
||||
4:[null,["$","html",null,{"lang":"en","suppressHydrationWarning":true,"children":[["$","head",null,{}],["$","body",null,{"className":"font-sans antialiased __variable_e66fe9 __variable_bd9c35","children":[["$","$L6",null,{}],["$","$L7",null,{"attribute":"class","defaultTheme":"system","enableSystem":true,"children":[["$","div",null,{"className":"flex min-h-screen flex-col","children":[["$","$L8",null,{}],["$","main",null,{"className":"bg-muted/50 flex flex-1 flex-col","children":["$","$L9",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","template":["$","$La",null,{}],"templateStyles":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"childProp":{"current":[null,["$","$Lb",null,{"className":"flex-1","children":["$","$L9",null,{"parallelRouterKey":"children","segmentPath":["children","(dashboard)","children"],"loading":"$undefined","loadingStyles":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","template":["$","$La",null,{}],"templateStyles":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"childProp":{"current":["$Lc",["$","$Ld",null,{"propsForComponent":{"params":{}},"Component":"$e"}],null],"segment":"__PAGE__"},"styles":[]}]}],null],"segment":"(dashboard)"},"styles":[]}]}]]}],null]}]]}]]}],null]
|
||||
c:null
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -1,7 +1,7 @@
|
|||
1:HL["/_next/static/media/86fdec36ddd9097e-s.p.woff2","font",{"crossOrigin":"","type":"font/woff2"}]
|
||||
2:HL["/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2","font",{"crossOrigin":"","type":"font/woff2"}]
|
||||
3:HL["/_next/static/css/83506ffe28f4b91c.css","style"]
|
||||
0:["y8UTfVMfS3WRo-O_Ue2fm",[[["",{"children":["playground",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],"$L4",[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/83506ffe28f4b91c.css","precedence":"next"}]],"$L5"]]]]
|
||||
0:["mnxnTVxz0_SgSNkjMXene",[[["",{"children":["playground",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],"$L4",[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/83506ffe28f4b91c.css","precedence":"next"}]],"$L5"]]]]
|
||||
6:I{"id":5925,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Toaster","async":false}
|
||||
7:I{"id":78495,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Providers","async":false}
|
||||
8:I{"id":11486,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Header","async":false}
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -1,7 +1,7 @@
|
|||
1:HL["/_next/static/media/86fdec36ddd9097e-s.p.woff2","font",{"crossOrigin":"","type":"font/woff2"}]
|
||||
2:HL["/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2","font",{"crossOrigin":"","type":"font/woff2"}]
|
||||
3:HL["/_next/static/css/83506ffe28f4b91c.css","style"]
|
||||
0:["y8UTfVMfS3WRo-O_Ue2fm",[[["",{"children":["(dashboard)",{"children":["swagger",{"children":["__PAGE__",{}]}]}]},"$undefined","$undefined",true],"$L4",[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/83506ffe28f4b91c.css","precedence":"next"}]],"$L5"]]]]
|
||||
0:["mnxnTVxz0_SgSNkjMXene",[[["",{"children":["(dashboard)",{"children":["swagger",{"children":["__PAGE__",{}]}]}]},"$undefined","$undefined",true],"$L4",[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/83506ffe28f4b91c.css","precedence":"next"}]],"$L5"]]]]
|
||||
6:I{"id":5925,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Toaster","async":false}
|
||||
7:I{"id":78495,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Providers","async":false}
|
||||
8:I{"id":11486,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Header","async":false}
|
||||
|
|
|
|||
Loading…
Reference in New Issue