feat: make --model optional, so user can start a chat only instance (#817)
parent
fb2b8dab2b
commit
25621547a7
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
## Fixes and Improvements
|
||||
* Fix the slow repository indexing due to constraint memory arena in tantivy index writer.
|
||||
* Make `--model` optional, so users can create a chat only instance.
|
||||
|
||||
# v0.5.5
|
||||
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ struct ApiDoc;
|
|||
pub struct ServeArgs {
|
||||
/// Model id for `/completions` API endpoint.
|
||||
#[clap(long)]
|
||||
model: String,
|
||||
model: Option<String>,
|
||||
|
||||
/// Model id for `/chat/completions` API endpoints.
|
||||
#[clap(long)]
|
||||
|
|
@ -122,7 +122,10 @@ pub async fn main(config: &Config, args: &ServeArgs) {
|
|||
}
|
||||
|
||||
async fn load_model(args: &ServeArgs) {
|
||||
download_model_if_needed(&args.model).await;
|
||||
if let Some(model) = &args.model {
|
||||
download_model_if_needed(model).await;
|
||||
}
|
||||
|
||||
if let Some(chat_model) = &args.chat_model {
|
||||
download_model_if_needed(chat_model).await
|
||||
}
|
||||
|
|
@ -131,16 +134,21 @@ async fn load_model(args: &ServeArgs) {
|
|||
async fn api_router(args: &ServeArgs, config: &Config) -> Router {
|
||||
let logger = Arc::new(create_logger());
|
||||
let code = Arc::new(crate::services::code::create_code_search());
|
||||
let completion = Arc::new(
|
||||
|
||||
let completion_state = if let Some(model) = &args.model {
|
||||
Some(Arc::new(
|
||||
create_completion_service(
|
||||
code.clone(),
|
||||
logger.clone(),
|
||||
&args.model,
|
||||
model,
|
||||
&args.device,
|
||||
args.parallelism,
|
||||
)
|
||||
.await,
|
||||
);
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let chat_state = if let Some(chat_model) = &args.chat_model {
|
||||
Some(Arc::new(
|
||||
|
|
@ -153,7 +161,7 @@ async fn api_router(args: &ServeArgs, config: &Config) -> Router {
|
|||
let mut routers = vec![];
|
||||
|
||||
let health_state = Arc::new(health::HealthState::new(
|
||||
&args.model,
|
||||
args.model.as_deref(),
|
||||
args.chat_model.as_deref(),
|
||||
&args.device,
|
||||
));
|
||||
|
|
@ -173,16 +181,18 @@ async fn api_router(args: &ServeArgs, config: &Config) -> Router {
|
|||
)
|
||||
});
|
||||
|
||||
if let Some(completion_state) = completion_state {
|
||||
routers.push({
|
||||
Router::new()
|
||||
.route(
|
||||
"/v1/completions",
|
||||
routing::post(routes::completions).with_state(completion),
|
||||
routing::post(routes::completions).with_state(completion_state),
|
||||
)
|
||||
.layer(TimeoutLayer::new(Duration::from_secs(
|
||||
config.server.completion_timeout,
|
||||
)))
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(chat_state) = chat_state {
|
||||
routers.push({
|
||||
|
|
@ -209,7 +219,11 @@ async fn api_router(args: &ServeArgs, config: &Config) -> Router {
|
|||
}
|
||||
|
||||
fn start_heartbeat(args: &ServeArgs) {
|
||||
let state = health::HealthState::new(&args.model, args.chat_model.as_deref(), &args.device);
|
||||
let state = health::HealthState::new(
|
||||
args.model.as_deref(),
|
||||
args.chat_model.as_deref(),
|
||||
&args.device,
|
||||
);
|
||||
tokio::spawn(async move {
|
||||
loop {
|
||||
usage::capture("ServeHealth", &state).await;
|
||||
|
|
|
|||
|
|
@ -10,7 +10,8 @@ use crate::Device;
|
|||
|
||||
#[derive(Serialize, Deserialize, ToSchema, Clone, Debug)]
|
||||
pub struct HealthState {
|
||||
model: String,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
model: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
chat_model: Option<String>,
|
||||
device: String,
|
||||
|
|
@ -22,7 +23,7 @@ pub struct HealthState {
|
|||
}
|
||||
|
||||
impl HealthState {
|
||||
pub fn new(model: &str, chat_model: Option<&str>, device: &Device) -> Self {
|
||||
pub fn new(model: Option<&str>, chat_model: Option<&str>, device: &Device) -> Self {
|
||||
let (cpu_info, cpu_count) = read_cpu_info();
|
||||
|
||||
let cuda_devices = match read_cuda_devices() {
|
||||
|
|
@ -31,7 +32,7 @@ impl HealthState {
|
|||
};
|
||||
|
||||
Self {
|
||||
model: model.to_owned(),
|
||||
model: model.map(|x| x.to_owned()),
|
||||
chat_model: chat_model.map(|x| x.to_owned()),
|
||||
device: device.to_string(),
|
||||
arch: ARCH.to_string(),
|
||||
|
|
|
|||
|
|
@ -98,12 +98,13 @@ function MainPanel() {
|
|||
<div className="mt-4 rounded-lg bg-zinc-100 p-4 dark:bg-zinc-800">
|
||||
<span className="font-bold">Workers</span>
|
||||
<div className="mt-4 flex flex-col gap-4 lg:flex-row lg:flex-wrap">
|
||||
{healthInfo.model &&
|
||||
<WorkerCard
|
||||
source="localhost"
|
||||
name={healthInfo.model}
|
||||
type="completion"
|
||||
health={healthInfo}
|
||||
/>
|
||||
/>}
|
||||
{healthInfo.chat_model && (
|
||||
<WorkerCard
|
||||
source="localhost"
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import fetcher from '@/lib/tabby-fetcher'
|
|||
|
||||
export interface HealthInfo {
|
||||
device: 'metal' | 'cpu' | 'cuda'
|
||||
model: string
|
||||
model?: string
|
||||
chat_model?: string
|
||||
cpu_info: string
|
||||
cpu_count: number
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -1,7 +1,7 @@
|
|||
1:HL["/_next/static/media/86fdec36ddd9097e-s.p.woff2","font",{"crossOrigin":"","type":"font/woff2"}]
|
||||
2:HL["/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2","font",{"crossOrigin":"","type":"font/woff2"}]
|
||||
3:HL["/_next/static/css/83506ffe28f4b91c.css","style"]
|
||||
0:["y8UTfVMfS3WRo-O_Ue2fm",[[["",{"children":["(dashboard)",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],"$L4",[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/83506ffe28f4b91c.css","precedence":"next"}]],"$L5"]]]]
|
||||
0:["mnxnTVxz0_SgSNkjMXene",[[["",{"children":["(dashboard)",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],"$L4",[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/83506ffe28f4b91c.css","precedence":"next"}]],"$L5"]]]]
|
||||
6:I{"id":5925,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Toaster","async":false}
|
||||
7:I{"id":78495,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Providers","async":false}
|
||||
8:I{"id":11486,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Header","async":false}
|
||||
|
|
@ -9,7 +9,7 @@
|
|||
a:I{"id":18639,"chunks":["272:static/chunks/webpack-7f4514982162b5cb.js","971:static/chunks/fd9d1056-6779f76bb208370f.js","590:static/chunks/590-758bf4c4ecd0fce8.js"],"name":"","async":false}
|
||||
b:I{"id":30831,"chunks":["882:static/chunks/882-5574357230a12cf9.js","396:static/chunks/396-89319c375c83667c.js","642:static/chunks/app/(dashboard)/layout-81899020c101ed0e.js"],"name":"","async":false}
|
||||
d:I{"id":65146,"chunks":["272:static/chunks/webpack-7f4514982162b5cb.js","971:static/chunks/fd9d1056-6779f76bb208370f.js","590:static/chunks/590-758bf4c4ecd0fce8.js"],"name":"","async":false}
|
||||
e:I{"id":60901,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","286:static/chunks/286-453e01dda7220f79.js","967:static/chunks/967-f02728ac0b1b01a8.js","130:static/chunks/app/(dashboard)/page-1cc6ccaef036b0bc.js"],"name":"","async":false}
|
||||
e:I{"id":60901,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","286:static/chunks/286-453e01dda7220f79.js","967:static/chunks/967-f02728ac0b1b01a8.js","130:static/chunks/app/(dashboard)/page-fe128a03e1e2a101.js"],"name":"","async":false}
|
||||
5:[["$","meta","0",{"charSet":"utf-8"}],["$","title","1",{"children":"Tabby - Dashboard"}],["$","meta","2",{"name":"description","content":"Tabby, an opensource, self-hosted AI coding assistant."}],["$","meta","3",{"name":"theme-color","media":"(prefers-color-scheme: light)","content":"white"}],["$","meta","4",{"name":"theme-color","media":"(prefers-color-scheme: dark)","content":"black"}],["$","meta","5",{"name":"viewport","content":"width=device-width, initial-scale=1"}],["$","meta","6",{"name":"next-size-adjust"}]]
|
||||
4:[null,["$","html",null,{"lang":"en","suppressHydrationWarning":true,"children":[["$","head",null,{}],["$","body",null,{"className":"font-sans antialiased __variable_e66fe9 __variable_bd9c35","children":[["$","$L6",null,{}],["$","$L7",null,{"attribute":"class","defaultTheme":"system","enableSystem":true,"children":[["$","div",null,{"className":"flex min-h-screen flex-col","children":[["$","$L8",null,{}],["$","main",null,{"className":"bg-muted/50 flex flex-1 flex-col","children":["$","$L9",null,{"parallelRouterKey":"children","segmentPath":["children"],"loading":"$undefined","loadingStyles":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","template":["$","$La",null,{}],"templateStyles":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"childProp":{"current":[null,["$","$Lb",null,{"className":"flex-1","children":["$","$L9",null,{"parallelRouterKey":"children","segmentPath":["children","(dashboard)","children"],"loading":"$undefined","loadingStyles":"$undefined","hasLoading":false,"error":"$undefined","errorStyles":"$undefined","template":["$","$La",null,{}],"templateStyles":"$undefined","notFound":[["$","title",null,{"children":"404: This page could not be found."}],["$","div",null,{"style":{"fontFamily":"system-ui,\"Segoe UI\",Roboto,Helvetica,Arial,sans-serif,\"Apple Color Emoji\",\"Segoe UI Emoji\"","height":"100vh","textAlign":"center","display":"flex","flexDirection":"column","alignItems":"center","justifyContent":"center"},"children":["$","div",null,{"children":[["$","style",null,{"dangerouslySetInnerHTML":{"__html":"body{color:#000;background:#fff;margin:0}.next-error-h1{border-right:1px solid rgba(0,0,0,.3)}@media (prefers-color-scheme:dark){body{color:#fff;background:#000}.next-error-h1{border-right:1px solid rgba(255,255,255,.3)}}"}}],["$","h1",null,{"className":"next-error-h1","style":{"display":"inline-block","margin":"0 20px 0 0","padding":"0 23px 0 0","fontSize":24,"fontWeight":500,"verticalAlign":"top","lineHeight":"49px"},"children":"404"}],["$","div",null,{"style":{"display":"inline-block"},"children":["$","h2",null,{"style":{"fontSize":14,"fontWeight":400,"lineHeight":"49px","margin":0},"children":"This page could not be found."}]}]]}]}]],"notFoundStyles":[],"childProp":{"current":["$Lc",["$","$Ld",null,{"propsForComponent":{"params":{}},"Component":"$e"}],null],"segment":"__PAGE__"},"styles":[]}]}],null],"segment":"(dashboard)"},"styles":[]}]}]]}],null]}]]}]]}],null]
|
||||
c:null
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -1,7 +1,7 @@
|
|||
1:HL["/_next/static/media/86fdec36ddd9097e-s.p.woff2","font",{"crossOrigin":"","type":"font/woff2"}]
|
||||
2:HL["/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2","font",{"crossOrigin":"","type":"font/woff2"}]
|
||||
3:HL["/_next/static/css/83506ffe28f4b91c.css","style"]
|
||||
0:["y8UTfVMfS3WRo-O_Ue2fm",[[["",{"children":["playground",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],"$L4",[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/83506ffe28f4b91c.css","precedence":"next"}]],"$L5"]]]]
|
||||
0:["mnxnTVxz0_SgSNkjMXene",[[["",{"children":["playground",{"children":["__PAGE__",{}]}]},"$undefined","$undefined",true],"$L4",[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/83506ffe28f4b91c.css","precedence":"next"}]],"$L5"]]]]
|
||||
6:I{"id":5925,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Toaster","async":false}
|
||||
7:I{"id":78495,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Providers","async":false}
|
||||
8:I{"id":11486,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Header","async":false}
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -1,7 +1,7 @@
|
|||
1:HL["/_next/static/media/86fdec36ddd9097e-s.p.woff2","font",{"crossOrigin":"","type":"font/woff2"}]
|
||||
2:HL["/_next/static/media/c9a5bc6a7c948fb0-s.p.woff2","font",{"crossOrigin":"","type":"font/woff2"}]
|
||||
3:HL["/_next/static/css/83506ffe28f4b91c.css","style"]
|
||||
0:["y8UTfVMfS3WRo-O_Ue2fm",[[["",{"children":["(dashboard)",{"children":["swagger",{"children":["__PAGE__",{}]}]}]},"$undefined","$undefined",true],"$L4",[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/83506ffe28f4b91c.css","precedence":"next"}]],"$L5"]]]]
|
||||
0:["mnxnTVxz0_SgSNkjMXene",[[["",{"children":["(dashboard)",{"children":["swagger",{"children":["__PAGE__",{}]}]}]},"$undefined","$undefined",true],"$L4",[[["$","link","0",{"rel":"stylesheet","href":"/_next/static/css/83506ffe28f4b91c.css","precedence":"next"}]],"$L5"]]]]
|
||||
6:I{"id":5925,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Toaster","async":false}
|
||||
7:I{"id":78495,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Providers","async":false}
|
||||
8:I{"id":11486,"chunks":["882:static/chunks/882-5574357230a12cf9.js","576:static/chunks/576-75ac7546de3029d6.js","197:static/chunks/197-f00b4479a1649cf2.js","396:static/chunks/396-89319c375c83667c.js","967:static/chunks/967-f02728ac0b1b01a8.js","185:static/chunks/app/layout-7d7d333b8349675e.js"],"name":"Header","async":false}
|
||||
|
|
|
|||
Loading…
Reference in New Issue