feat: improve events system (#40)

* feat: improve events system

* docs: add Events.md for Event sub system.

* Link vector.toml
add-more-languages
Meng Zhang 2023-04-04 13:22:16 +08:00 committed by GitHub
parent 75a8c7f439
commit 79585cc2a4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 61 additions and 8 deletions

View File

@ -6,14 +6,33 @@ address = "0.0.0.0:8686"
type = "file"
include = ["/data/logs/tabby-server/events.*.log"]
[transforms.process_tabby_server_logs]
[transforms.process_tabby_server_events]
type = "remap"
inputs = [ "tabby_server_logs" ]
source = ". = parse_json!(parse_json!(.message).record.message)"
source = """
record = parse_json!(.message).record
[sinks.write_tabby_server_logs]
id = {
"process_id": record.process.id,
"thread_id": record.thread.id,
"timestamp": record.time.timestamp
}
. = {
"id": id,
"data": parse_json!(record.message)
}
"""
[sinks.write_tabby_server_events]
type = "file"
inputs = [ "process_tabby_server_logs" ]
inputs = [ "process_tabby_server_events" ]
encoding = { codec = "json" }
framing = { method = "newline_delimited" }
path = "/data/logs/events/tabby-server/%Y-%m-%d.json"
[sinks.all_events]
type = "console"
inputs = [ "process_tabby_server_events" ]
encoding = { codec = "json" }
framing = { method = "newline_delimited" }
path = "/data/logs/tabby-server/events.%Y-%m-%d.json"

31
docs/internal/Events.md Normal file
View File

@ -0,0 +1,31 @@
# Events System
In Tabby, we use the [`vector`](../../deployment/config/vector.toml) to collect logs from various sources, transform them into a standard `Event`, and persist them in `/data/logs/events`.
## Schema
### Event
```jsx
{
"id": EventId,
"data": Any
}
```
The `id` field can be used to uniquely identify an event.
The `data` field is a standard JSON object, and its definition is left to downstream tasks.
### EventId
```jsx
{
"process_id": Number,
"thread_id": Number,
// Unix timestamp
"timestamp": Number,
}
```
In the future, we might add `server_id` when Tabby evolves into a distributed environment.

View File

@ -3,7 +3,7 @@ set -e
DB_FILE=${DB_FILE:-"/data/logs/duckdb/duck.db"}
LOGS_DIR=${LOGS_DIR:-"/data/logs"}
TABBY_SERVER_LOGS="${LOGS_DIR}/tabby-server/events.*.json"
TABBY_SERVER_LOGS="${LOGS_DIR}/events/tabby-server/*.json"
# Init schema
function init_scheme() {
@ -27,6 +27,9 @@ function collect_tabby_server_logs() {
if compgen -G "${TABBY_SERVER_LOGS}" > /dev/null; then
cat <<EOF | duckdb
CREATE TEMP TABLE events AS
SELECT data.* FROM '${TABBY_SERVER_LOGS}';
CREATE TEMP TABLE t AS
SELECT id, created, prompt, choices, IFNULL(rhs.view, false) AS view, IFNULL(rhs.select, false) AS select
FROM
@ -36,13 +39,13 @@ FROM
FIRST(created) AS created,
FIRST(prompt) AS prompt,
FIRST(choices) AS choices
FROM '${TABBY_SERVER_LOGS}' WHERE id IS NOT NULL GROUP BY 1) lhs
FROM events WHERE id IS NOT NULL GROUP BY 1) lhs
LEFT JOIN (
SELECT
completion_id,
(SUM(IF(type == 'view', 1, 0)) > 0) AS view,
(SUM(IF(type == 'select', 1, 0)) > 0) AS select
FROM '${TABBY_SERVER_LOGS}'
FROM events
WHERE completion_id IS NOT NULL
GROUP BY 1
) rhs ON (lhs.id = rhs.completion_id);