* add loadtest * release 0.6.0 * Release 0.6.0-rc.0 http-api-bindings@0.6.0-rc.0 juniper-axum@0.6.0-rc.0 llama-cpp-bindings@0.6.0-rc.0 tabby@0.6.0-rc.0 tabby-common@0.6.0-rc.0 tabby-download@0.6.0-rc.0 tabby-inference@0.6.0-rc.0 tabby-scheduler@0.6.0-rc.0 tabby-webserver@0.6.0-rc.0 Generated by cargo-workspaces * Release 0.6.0-rc.1 http-api-bindings@0.6.0-rc.1 juniper-axum@0.6.0-rc.1 llama-cpp-bindings@0.6.0-rc.1 tabby@0.6.0-rc.1 tabby-common@0.6.0-rc.1 tabby-download@0.6.0-rc.1 tabby-inference@0.6.0-rc.1 tabby-scheduler@0.6.0-rc.1 tabby-webserver@0.6.0-rc.1 Generated by cargo-workspaces * Release 0.6.0-rc.2 http-api-bindings@0.6.0-rc.2 juniper-axum@0.6.0-rc.2 llama-cpp-bindings@0.6.0-rc.2 tabby@0.6.0-rc.2 tabby-common@0.6.0-rc.2 tabby-download@0.6.0-rc.2 tabby-inference@0.6.0-rc.2 tabby-scheduler@0.6.0-rc.2 tabby-webserver@0.6.0-rc.2 Generated by cargo-workspaces * Release 0.6.0 http-api-bindings@0.6.0 juniper-axum@0.6.0 llama-cpp-bindings@0.6.0 tabby@0.6.0 tabby-common@0.6.0 tabby-download@0.6.0 tabby-inference@0.6.0 tabby-scheduler@0.6.0 tabby-webserver@0.6.0 Generated by cargo-workspaces |
||
|---|---|---|
| .. | ||
| src | ||
| Cargo.toml | ||
| README.md | ||
README.md
Examples
export MODEL_ID="code-gecko"
export PROJECT_ID="$(gcloud config get project)"
export API_ENDPOINT="https://us-central1-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/${MODEL_ID}:predict"
export AUTHORIZATION="Bearer $(gcloud auth print-access-token)"
cargo run --example simple
Usage
export MODEL_ID="code-gecko"
export PROJECT_ID="$(gcloud config get project)"
export API_ENDPOINT="https://us-central1-aiplatform.googleapis.com/v1/projects/${PROJECT_ID}/locations/us-central1/publishers/google/models/${MODEL_ID}:predict"
export AUTHORIZATION="Bearer $(gcloud auth print-access-token)"
cargo run serve --device experimental-http --model "{\"kind\": \"vertex-ai\", \"api_endpoint\": \"$API_ENDPOINT\", \"authorization\": \"$AUTHORIZATION\"}"