feat: add datset viewer, remove treesitter languages that hasn't been verified (#509)
* refactor: remove not verified tree sitter queries * feat(experimental): add dataset viewer updatewsxiaoys-patch-1
parent
55f68d4224
commit
1babc38902
|
|
@ -3194,14 +3194,9 @@ dependencies = [
|
||||||
"tokio",
|
"tokio",
|
||||||
"tracing",
|
"tracing",
|
||||||
"tracing-test",
|
"tracing-test",
|
||||||
"tree-sitter-go",
|
|
||||||
"tree-sitter-java",
|
|
||||||
"tree-sitter-javascript",
|
|
||||||
"tree-sitter-lua",
|
|
||||||
"tree-sitter-python",
|
"tree-sitter-python",
|
||||||
"tree-sitter-rust",
|
"tree-sitter-rust",
|
||||||
"tree-sitter-tags",
|
"tree-sitter-tags",
|
||||||
"tree-sitter-typescript",
|
|
||||||
"walkdir",
|
"walkdir",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -3861,46 +3856,6 @@ dependencies = [
|
||||||
"regex",
|
"regex",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "tree-sitter-go"
|
|
||||||
version = "0.20.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "1ad6d11f19441b961af2fda7f12f5d0dac325f6d6de83836a1d3750018cc5114"
|
|
||||||
dependencies = [
|
|
||||||
"cc",
|
|
||||||
"tree-sitter",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "tree-sitter-java"
|
|
||||||
version = "0.20.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "f0bf5d3f508cbffcbfe1805834101c0d24297a8b6c2184ad9c595556c46d2420"
|
|
||||||
dependencies = [
|
|
||||||
"cc",
|
|
||||||
"tree-sitter",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "tree-sitter-javascript"
|
|
||||||
version = "0.20.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "2490fab08630b2c8943c320f7b63473cbf65511c8d83aec551beb9b4375906ed"
|
|
||||||
dependencies = [
|
|
||||||
"cc",
|
|
||||||
"tree-sitter",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "tree-sitter-lua"
|
|
||||||
version = "0.0.19"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "0968cf4962ead1d26da28921dde1fd97407e7bbcf2f959cd20cf04ba2daa9421"
|
|
||||||
dependencies = [
|
|
||||||
"cc",
|
|
||||||
"tree-sitter",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tree-sitter-python"
|
name = "tree-sitter-python"
|
||||||
version = "0.20.2"
|
version = "0.20.2"
|
||||||
|
|
@ -3933,16 +3888,6 @@ dependencies = [
|
||||||
"tree-sitter",
|
"tree-sitter",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "tree-sitter-typescript"
|
|
||||||
version = "0.20.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "079c695c32d39ad089101c66393aeaca30e967fba3486a91f573d2f0e12d290a"
|
|
||||||
dependencies = [
|
|
||||||
"cc",
|
|
||||||
"tree-sitter",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "try-lock"
|
name = "try-lock"
|
||||||
version = "0.2.4"
|
version = "0.2.4"
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,6 @@ job_scheduler = "1.2.1"
|
||||||
tabby-common = { path = "../tabby-common" }
|
tabby-common = { path = "../tabby-common" }
|
||||||
tantivy = { workspace = true }
|
tantivy = { workspace = true }
|
||||||
tracing = { workspace = true }
|
tracing = { workspace = true }
|
||||||
tree-sitter-javascript = "0.20.0"
|
|
||||||
tree-sitter-tags = "0.20.2"
|
tree-sitter-tags = "0.20.2"
|
||||||
walkdir = "2.3.3"
|
walkdir = "2.3.3"
|
||||||
lazy_static = { workspace = true }
|
lazy_static = { workspace = true }
|
||||||
|
|
@ -21,10 +20,6 @@ serde-jsonlines = { workspace = true }
|
||||||
file-rotate = "0.7.5"
|
file-rotate = "0.7.5"
|
||||||
tree-sitter-python = "0.20.2"
|
tree-sitter-python = "0.20.2"
|
||||||
tree-sitter-rust = "0.20.3"
|
tree-sitter-rust = "0.20.3"
|
||||||
tree-sitter-go = "0.20.0"
|
|
||||||
tree-sitter-java = "0.20.0"
|
|
||||||
tree-sitter-typescript = "0.20.2"
|
|
||||||
tree-sitter-lua = "0.0.19"
|
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
temp_testdir = "0.2"
|
temp_testdir = "0.2"
|
||||||
|
|
|
||||||
|
|
@ -248,83 +248,6 @@ lazy_static! {
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
(
|
|
||||||
"javascript",
|
|
||||||
TagsConfigurationSync(
|
|
||||||
TagsConfiguration::new(
|
|
||||||
tree_sitter_javascript::language(),
|
|
||||||
tree_sitter_javascript::TAGGING_QUERY,
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
.unwrap(),
|
|
||||||
),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"jsx",
|
|
||||||
TagsConfigurationSync(
|
|
||||||
TagsConfiguration::new(
|
|
||||||
tree_sitter_javascript::language(),
|
|
||||||
tree_sitter_javascript::TAGGING_QUERY,
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
.unwrap(),
|
|
||||||
),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"typescript",
|
|
||||||
TagsConfigurationSync(
|
|
||||||
TagsConfiguration::new(
|
|
||||||
tree_sitter_typescript::language_typescript(),
|
|
||||||
tree_sitter_typescript::TAGGING_QUERY,
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
.unwrap(),
|
|
||||||
),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"tsx",
|
|
||||||
TagsConfigurationSync(
|
|
||||||
TagsConfiguration::new(
|
|
||||||
tree_sitter_typescript::language_tsx(),
|
|
||||||
tree_sitter_typescript::TAGGING_QUERY,
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
.unwrap(),
|
|
||||||
),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"java",
|
|
||||||
TagsConfigurationSync(
|
|
||||||
TagsConfiguration::new(
|
|
||||||
tree_sitter_java::language(),
|
|
||||||
tree_sitter_java::TAGGING_QUERY,
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
.unwrap(),
|
|
||||||
),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"go",
|
|
||||||
TagsConfigurationSync(
|
|
||||||
TagsConfiguration::new(
|
|
||||||
tree_sitter_go::language(),
|
|
||||||
tree_sitter_go::TAGGING_QUERY,
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
.unwrap(),
|
|
||||||
),
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"lua",
|
|
||||||
TagsConfigurationSync(
|
|
||||||
TagsConfiguration::new(
|
|
||||||
tree_sitter_lua::language(),
|
|
||||||
tree_sitter_lua::TAGS_QUERY,
|
|
||||||
"",
|
|
||||||
)
|
|
||||||
.unwrap(),
|
|
||||||
),
|
|
||||||
),
|
|
||||||
])
|
])
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,47 @@
|
||||||
|
import pandas as pd
|
||||||
|
import streamlit as st
|
||||||
|
|
||||||
|
# force wide mode
|
||||||
|
st.set_page_config(layout="wide")
|
||||||
|
|
||||||
|
st.write("Files")
|
||||||
|
|
||||||
|
# read dataframe.
|
||||||
|
df = pd.read_json("~/.tabby/dataset/data.jsonl", lines = True)
|
||||||
|
|
||||||
|
# remove useless columns
|
||||||
|
del df["git_url"]
|
||||||
|
|
||||||
|
# filter df
|
||||||
|
df = df[df["max_line_length"] < 200]
|
||||||
|
df = df[df.apply(lambda x: len(x['tags']) > 0, axis=1)]
|
||||||
|
|
||||||
|
selected = st.selectbox(
|
||||||
|
"Filename",
|
||||||
|
df.filepath,
|
||||||
|
)
|
||||||
|
|
||||||
|
selected_row = df[df.filepath == selected].iloc[0]
|
||||||
|
|
||||||
|
def get_range(lst, x):
|
||||||
|
return lst[x['start']:x['end']]
|
||||||
|
|
||||||
|
if selected_row is not None:
|
||||||
|
kinds = set([x['syntax_type_name'] for x in selected_row.tags])
|
||||||
|
enabled_kinds = st.multiselect("Displayed Kinds", kinds, default=kinds, key=selected_row.filepath)
|
||||||
|
col1, col2 = st.columns(2)
|
||||||
|
|
||||||
|
content = selected_row.content
|
||||||
|
with col1:
|
||||||
|
st.write(f"File: {selected_row.filepath}")
|
||||||
|
st.code(content, line_numbers=True)
|
||||||
|
|
||||||
|
with col2:
|
||||||
|
for tag in selected_row.tags:
|
||||||
|
name = get_range(content, tag['name_range'])
|
||||||
|
kind = tag['syntax_type_name']
|
||||||
|
if kind not in enabled_kinds:
|
||||||
|
continue
|
||||||
|
is_definition = '✅' if tag['is_definition'] else '❌'
|
||||||
|
st.markdown(f"### `{name}`\nkind: {kind}, is_definition: {is_definition}")
|
||||||
|
st.code(get_range(content, tag['range']))
|
||||||
Loading…
Reference in New Issue