temp init
parent
99d1bf34bb
commit
d80e675211
|
|
@ -0,0 +1,3 @@
|
|||
tabby.egg-info/
|
||||
__pycache__
|
||||
tmp*/
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
# tabby
|
||||
|
||||
## Setup Development Environment
|
||||
|
||||
```bash
|
||||
pip install -e ".[dev]"
|
||||
```
|
||||
|
||||
Then, start the Dagster UI web server:
|
||||
|
||||
```bash
|
||||
dagster dev
|
||||
```
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
[build-system]
|
||||
requires = ["setuptools"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.dagster]
|
||||
module_name = "tabby"
|
||||
|
|
@ -0,0 +1,2 @@
|
|||
[metadata]
|
||||
name = tabby
|
||||
|
|
@ -0,0 +1,12 @@
|
|||
from setuptools import find_packages, setup
|
||||
|
||||
setup(
|
||||
name="tabby",
|
||||
packages=find_packages(exclude=["tabby_tests"]),
|
||||
install_requires=[
|
||||
"dagster",
|
||||
"dagster-cloud",
|
||||
"dagster-pandas"
|
||||
],
|
||||
extras_require={"dev": ["dagster-webserver", "pytest"]},
|
||||
)
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
from dagster import Definitions, load_assets_from_modules
|
||||
|
||||
from . import assets
|
||||
|
||||
all_assets = load_assets_from_modules([assets])
|
||||
|
||||
defs = Definitions(
|
||||
assets=all_assets,
|
||||
)
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
from dagster import Out, Output, MetadataValue, asset
|
||||
from dagster_pandas import DataFrame, PandasColumn, create_dagster_pandas_dataframe_type
|
||||
|
||||
import pandas as pd
|
||||
import json
|
||||
import glob
|
||||
|
||||
from . import constants
|
||||
|
||||
DatasetDataFrame = create_dagster_pandas_dataframe_type(
|
||||
name="DatasetDataFrame",
|
||||
columns = [
|
||||
PandasColumn.string_column("git_url"),
|
||||
PandasColumn.string_column("filepath"),
|
||||
PandasColumn.string_column("content"),
|
||||
PandasColumn.string_column("language"),
|
||||
|
||||
PandasColumn.integer_column("max_line_length"),
|
||||
PandasColumn.float_column("avg_line_length"),
|
||||
PandasColumn.float_column("alphanum_fraction"),
|
||||
|
||||
PandasColumn.exists("tags"),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@asset(dagster_type=DatasetDataFrame)
|
||||
def dataset_files():
|
||||
"""Get source code information from TABBY_ROOT"""
|
||||
|
||||
ds = []
|
||||
for path in glob.glob(constants.TABBY_DATASET_FILEPATTERN):
|
||||
with open(path, "r") as f:
|
||||
for line in f.readlines():
|
||||
ds.append(json.loads(line))
|
||||
|
||||
df = pd.DataFrame(ds)
|
||||
return Output(df, metadata={"num_files": len(df) })
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
import os
|
||||
|
||||
TABBY_ROOT = os.environ.get("TABBY_ROOT", os.path.expanduser("~/.tabby"))
|
||||
|
||||
TABBY_DATASET_FILEPATTERN = os.path.join(TABBY_ROOT, "dataset/*.jsonl")
|
||||
|
|
@ -0,0 +1 @@
|
|||
|
||||
|
|
@ -0,0 +1 @@
|
|||
|
||||
Loading…
Reference in New Issue