Improve charting for admin

add-more-languages
Meng Zhang 2023-04-03 23:19:25 +08:00
parent 350b719db9
commit 500466aca9
2 changed files with 50 additions and 10 deletions

View File

@ -16,7 +16,7 @@ def query_data():
df = conn.sql( df = conn.sql(
""" """
SELECT SELECT
date_trunc('hour', to_timestamp(CAST(created AS int64))) AS Date, date_trunc('day', to_timestamp(CAST(created AS int64))) AS Date,
SUM(IF(view, 1, 0)) as "Views", SUM(IF(view, 1, 0)) as "Views",
SUM(IF("select", 1, 0)) as "Acceptances" SUM(IF("select", 1, 0)) as "Acceptances"
FROM completion_events FROM completion_events
@ -30,12 +30,36 @@ def query_data():
df = query_data() df = query_data()
if len(df) > 0:
def plot_summary():
sum_views = int(sum(df.Views))
sum_acceptances = int(sum(df.Acceptances))
ratio = (sum_acceptances / sum_views) * 100
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Views", sum_views)
with col2:
st.metric("Acceptances", sum_acceptances)
with col3:
st.metric("Accept Ratio", f"{round(ratio)} %")
plot_summary()
st.write("---")
def plot_charts():
st.markdown("### Completion Events") st.markdown("### Completion Events")
st.line_chart(df, x="Date") st.line_chart(df, x="Date")
st.markdown("### Acceptance Rate") st.markdown("### Acceptance Rate")
df["Acceptance Rate"] = df["Acceptances"] / df["Views"] df["Acceptance Rate"] = df["Acceptances"] / df["Views"]
st.line_chart(df, x="Date", y="Acceptance Rate") st.line_chart(df, x="Date", y="Acceptance Rate")
if len(df) > 0:
plot_charts()
else: else:
st.markdown("No data available") st.markdown("No data available")

View File

@ -1,6 +1,7 @@
import os import os
from pathlib import Path from pathlib import Path
import pandas as pd
import streamlit as st import streamlit as st
import toml import toml
from datasets import load_from_disk from datasets import load_from_disk
@ -12,27 +13,42 @@ dataset_dir = os.environ.get("DATASET_DIR", None)
git_repositories_dir = os.environ.get("GIT_REPOSITORIES_DIR", None) git_repositories_dir = os.environ.get("GIT_REPOSITORIES_DIR", None)
config_file = os.environ.get("CONFIG_FILE", None) config_file = os.environ.get("CONFIG_FILE", None)
config = toml.load(config_file) config = toml.load(config_file)
projects = config.get("projects", {})
def count_by_language(dataset):
key = "language"
df = (
pd.DataFrame(dataset[key], columns=[key])
.groupby([key])
.size()
.to_frame("# Files")
)
return df
def dataset_info(): def dataset_info():
st.subheader("Dataset")
if not Path(dataset_dir).is_dir(): if not Path(dataset_dir).is_dir():
st.write("*Not populated*") st.write("*n/a*")
return return
info = load_from_disk(dataset_dir) dataset = load_from_disk(dataset_dir)
st.write("Source files: ", len(info))
col1, col2 = st.columns(2)
with col1:
st.metric("Total files", len(dataset))
with col2:
st.bar_chart(count_by_language(dataset))
def project_list(): def project_list():
data = config.get("projects", {}) if len(projects) <= 0:
if len(data) <= 0:
st.write("Your project list is empty") st.write("Your project list is empty")
st.write(f"Edit `{config_file}` to add projects") st.write(f"Edit `{config_file}` to add projects")
return return
for k, v in data.items(): for k, v in projects.items():
st.subheader(k) st.subheader(k)
st.write(f'Git: {v["git_url"]}') st.write(f'Git: {v["git_url"]}')