feat: add datset viewer, remove treesitter languages that hasn't been verified (#509)
* refactor: remove not verified tree sitter queries * feat(experimental): add dataset viewer updatewsxiaoys-patch-1
parent
55f68d4224
commit
1babc38902
|
|
@ -3194,14 +3194,9 @@ dependencies = [
|
|||
"tokio",
|
||||
"tracing",
|
||||
"tracing-test",
|
||||
"tree-sitter-go",
|
||||
"tree-sitter-java",
|
||||
"tree-sitter-javascript",
|
||||
"tree-sitter-lua",
|
||||
"tree-sitter-python",
|
||||
"tree-sitter-rust",
|
||||
"tree-sitter-tags",
|
||||
"tree-sitter-typescript",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
|
|
@ -3861,46 +3856,6 @@ dependencies = [
|
|||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-go"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1ad6d11f19441b961af2fda7f12f5d0dac325f6d6de83836a1d3750018cc5114"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-java"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0bf5d3f508cbffcbfe1805834101c0d24297a8b6c2184ad9c595556c46d2420"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-javascript"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2490fab08630b2c8943c320f7b63473cbf65511c8d83aec551beb9b4375906ed"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-lua"
|
||||
version = "0.0.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0968cf4962ead1d26da28921dde1fd97407e7bbcf2f959cd20cf04ba2daa9421"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-python"
|
||||
version = "0.20.2"
|
||||
|
|
@ -3933,16 +3888,6 @@ dependencies = [
|
|||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tree-sitter-typescript"
|
||||
version = "0.20.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "079c695c32d39ad089101c66393aeaca30e967fba3486a91f573d2f0e12d290a"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"tree-sitter",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "try-lock"
|
||||
version = "0.2.4"
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ job_scheduler = "1.2.1"
|
|||
tabby-common = { path = "../tabby-common" }
|
||||
tantivy = { workspace = true }
|
||||
tracing = { workspace = true }
|
||||
tree-sitter-javascript = "0.20.0"
|
||||
tree-sitter-tags = "0.20.2"
|
||||
walkdir = "2.3.3"
|
||||
lazy_static = { workspace = true }
|
||||
|
|
@ -21,10 +20,6 @@ serde-jsonlines = { workspace = true }
|
|||
file-rotate = "0.7.5"
|
||||
tree-sitter-python = "0.20.2"
|
||||
tree-sitter-rust = "0.20.3"
|
||||
tree-sitter-go = "0.20.0"
|
||||
tree-sitter-java = "0.20.0"
|
||||
tree-sitter-typescript = "0.20.2"
|
||||
tree-sitter-lua = "0.0.19"
|
||||
|
||||
[dev-dependencies]
|
||||
temp_testdir = "0.2"
|
||||
|
|
|
|||
|
|
@ -248,83 +248,6 @@ lazy_static! {
|
|||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"javascript",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_javascript::language(),
|
||||
tree_sitter_javascript::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"jsx",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_javascript::language(),
|
||||
tree_sitter_javascript::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"typescript",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_typescript::language_typescript(),
|
||||
tree_sitter_typescript::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"tsx",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_typescript::language_tsx(),
|
||||
tree_sitter_typescript::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"java",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_java::language(),
|
||||
tree_sitter_java::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"go",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_go::language(),
|
||||
tree_sitter_go::TAGGING_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"lua",
|
||||
TagsConfigurationSync(
|
||||
TagsConfiguration::new(
|
||||
tree_sitter_lua::language(),
|
||||
tree_sitter_lua::TAGS_QUERY,
|
||||
"",
|
||||
)
|
||||
.unwrap(),
|
||||
),
|
||||
),
|
||||
])
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,47 @@
|
|||
import pandas as pd
|
||||
import streamlit as st
|
||||
|
||||
# force wide mode
|
||||
st.set_page_config(layout="wide")
|
||||
|
||||
st.write("Files")
|
||||
|
||||
# read dataframe.
|
||||
df = pd.read_json("~/.tabby/dataset/data.jsonl", lines = True)
|
||||
|
||||
# remove useless columns
|
||||
del df["git_url"]
|
||||
|
||||
# filter df
|
||||
df = df[df["max_line_length"] < 200]
|
||||
df = df[df.apply(lambda x: len(x['tags']) > 0, axis=1)]
|
||||
|
||||
selected = st.selectbox(
|
||||
"Filename",
|
||||
df.filepath,
|
||||
)
|
||||
|
||||
selected_row = df[df.filepath == selected].iloc[0]
|
||||
|
||||
def get_range(lst, x):
|
||||
return lst[x['start']:x['end']]
|
||||
|
||||
if selected_row is not None:
|
||||
kinds = set([x['syntax_type_name'] for x in selected_row.tags])
|
||||
enabled_kinds = st.multiselect("Displayed Kinds", kinds, default=kinds, key=selected_row.filepath)
|
||||
col1, col2 = st.columns(2)
|
||||
|
||||
content = selected_row.content
|
||||
with col1:
|
||||
st.write(f"File: {selected_row.filepath}")
|
||||
st.code(content, line_numbers=True)
|
||||
|
||||
with col2:
|
||||
for tag in selected_row.tags:
|
||||
name = get_range(content, tag['name_range'])
|
||||
kind = tag['syntax_type_name']
|
||||
if kind not in enabled_kinds:
|
||||
continue
|
||||
is_definition = '✅' if tag['is_definition'] else '❌'
|
||||
st.markdown(f"### `{name}`\nkind: {kind}, is_definition: {is_definition}")
|
||||
st.code(get_range(content, tag['range']))
|
||||
Loading…
Reference in New Issue