feat: add python eval script (#266)
* feat: add python eval script * update * add local script * move eval script * update * update * update * update * update * update * update * update * update * update * update * update * add READMEsweep/improve-logging-information
parent
e822d1857f
commit
bed723fced
|
|
@ -0,0 +1,4 @@
|
||||||
|
__pycache__
|
||||||
|
.ipynb_checkpoints
|
||||||
|
reports.*
|
||||||
|
tabby
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
# Eval
|
||||||
|
|
||||||
|
## Local
|
||||||
|
`./eval.sh`
|
||||||
|
|
||||||
|
## Skypilot
|
||||||
|
`./eval_sky.sh`
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
[[repositories]]
|
||||||
|
git_url = "https://github.com/huggingface/text-generation-inference"
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
version: '3.5'
|
||||||
|
services:
|
||||||
|
tabby:
|
||||||
|
command: serve --model TabbyML/SantaCoder-1B --device cuda
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
reservations:
|
||||||
|
devices:
|
||||||
|
- driver: nvidia
|
||||||
|
count: 1
|
||||||
|
capabilities: [gpu]
|
||||||
|
|
@ -6,7 +6,7 @@ services:
|
||||||
platform: linux/amd64
|
platform: linux/amd64
|
||||||
command: scheduler --now
|
command: scheduler --now
|
||||||
volumes:
|
volumes:
|
||||||
- "$HOME/.tabby:/data"
|
- "$PWD/tabby:/data"
|
||||||
|
|
||||||
tabby:
|
tabby:
|
||||||
depends_on:
|
depends_on:
|
||||||
|
|
@ -15,5 +15,7 @@ services:
|
||||||
image: tabbyml/tabby
|
image: tabbyml/tabby
|
||||||
platform: linux/amd64
|
platform: linux/amd64
|
||||||
command: serve --model TabbyML/T5P-220M
|
command: serve --model TabbyML/T5P-220M
|
||||||
|
ports:
|
||||||
|
- "8080:8080"
|
||||||
volumes:
|
volumes:
|
||||||
- "$HOME/.tabby:/data"
|
- "$PWD/tabby:/data"
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,26 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
mkdir -p tabby
|
||||||
|
cp config.toml tabby/
|
||||||
|
|
||||||
|
docker-compose down
|
||||||
|
|
||||||
|
if nvidia-smi; then
|
||||||
|
docker-compose -f docker-compose.yaml -f docker-compose.cuda.yaml up -d
|
||||||
|
else
|
||||||
|
docker-compose up -d
|
||||||
|
fi
|
||||||
|
|
||||||
|
while ! curl -X POST http://localhost:8080/v1/health; do
|
||||||
|
echo "server not ready, waiting..."
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
|
||||||
|
papermill main.ipynb ./reports.ipynb -r filepattern "./tabby/dataset/*.jsonl" -r max_records "${MAX_RECORDS:-3}"
|
||||||
|
|
||||||
|
jupyter nbconvert reports.ipynb --TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags remove --to html
|
||||||
|
|
||||||
|
docker-compose down
|
||||||
|
|
||||||
|
echo done
|
||||||
|
|
@ -0,0 +1,11 @@
|
||||||
|
#!/bin/bash
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
ARGS="tabby-eval skypilot.yaml --env MAX_RECORDS=300"
|
||||||
|
|
||||||
|
if ! sky exec $ARGS; then
|
||||||
|
sky launch -c $ARGS
|
||||||
|
fi
|
||||||
|
|
||||||
|
scp tabby-eval:~/sky_workdir/reports.ipynb ./
|
||||||
|
scp tabby-eval:~/sky_workdir/reports.html ./
|
||||||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,89 @@
|
||||||
|
from typing import Iterator
|
||||||
|
|
||||||
|
import glob
|
||||||
|
import json
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from transformers import HfArgumentParser
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Item:
|
||||||
|
git_url: str
|
||||||
|
filepath: str
|
||||||
|
language: str
|
||||||
|
|
||||||
|
name: str
|
||||||
|
body: str
|
||||||
|
prefix: str
|
||||||
|
suffix: str
|
||||||
|
|
||||||
|
|
||||||
|
def iter_items(doc) -> Iterator[Item]:
|
||||||
|
if doc["max_line_length"] > 500:
|
||||||
|
return
|
||||||
|
|
||||||
|
if doc["avg_line_length"] < 10 or doc["avg_line_length"] > 200:
|
||||||
|
return
|
||||||
|
|
||||||
|
if doc["alphanum_fraction"] < 0.25:
|
||||||
|
return
|
||||||
|
|
||||||
|
for tag in doc["tags"]:
|
||||||
|
content = doc["content"]
|
||||||
|
name = get_content(content, tag["name_range"])
|
||||||
|
body = get_content(content, tag["range"])
|
||||||
|
|
||||||
|
prefix = get_prefix(content, tag["range"]["start"])
|
||||||
|
suffix = get_suffix(content, tag["range"]["end"])
|
||||||
|
|
||||||
|
yield Item(
|
||||||
|
name=name,
|
||||||
|
body=body,
|
||||||
|
prefix=prefix,
|
||||||
|
suffix=suffix,
|
||||||
|
git_url=doc["git_url"],
|
||||||
|
filepath=doc["filepath"],
|
||||||
|
language=doc["language"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def iter_docs(filepattern: str):
|
||||||
|
for filepath in glob.glob(filepattern):
|
||||||
|
with open(filepath) as f:
|
||||||
|
for line in f:
|
||||||
|
yield json.loads(line)
|
||||||
|
|
||||||
|
|
||||||
|
def get_content(content: str, range: dict):
|
||||||
|
return content[range["start"] : range["end"]]
|
||||||
|
|
||||||
|
|
||||||
|
def get_prefix(content: str, start: int, max=20):
|
||||||
|
num_lines = 0
|
||||||
|
prefix_start = 0
|
||||||
|
for prefix_start in range(start - 1, 0, -1):
|
||||||
|
if content[prefix_start] == "\n":
|
||||||
|
num_lines += 1
|
||||||
|
|
||||||
|
if num_lines == max:
|
||||||
|
break
|
||||||
|
|
||||||
|
return content[prefix_start + 1 : start]
|
||||||
|
|
||||||
|
|
||||||
|
def get_suffix(content: str, end: int, max=20):
|
||||||
|
num_lines = 0
|
||||||
|
suffix_end = end
|
||||||
|
for suffix_end in range(end, len(content)):
|
||||||
|
if content[suffix_end] == "\n":
|
||||||
|
num_lines += 1
|
||||||
|
|
||||||
|
if num_lines == max:
|
||||||
|
break
|
||||||
|
|
||||||
|
return content[end : suffix_end - 1]
|
||||||
|
|
||||||
|
|
||||||
|
def items_from_filepattern(filepattern: str):
|
||||||
|
for doc in iter_docs(filepattern):
|
||||||
|
yield from iter_items(doc)
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
papermill
|
||||||
|
git+https://github.com/TabbyML/tabby.git#egg=tabby-python-client&subdirectory=clients/tabby-python-client
|
||||||
|
transformers
|
||||||
|
editdistance
|
||||||
|
matplotlib
|
||||||
|
notebook
|
||||||
|
|
@ -1,6 +1,9 @@
|
||||||
resources:
|
resources:
|
||||||
accelerators: T4:1
|
accelerators: T4:1
|
||||||
|
|
||||||
|
# tabby base dir
|
||||||
|
workdir: ./
|
||||||
|
|
||||||
setup: |
|
setup: |
|
||||||
set -ex
|
set -ex
|
||||||
|
|
||||||
|
|
@ -8,9 +11,8 @@ setup: |
|
||||||
sudo curl -L https://github.com/docker/compose/releases/download/v2.17.2/docker-compose-linux-x86_64 -o /usr/local/bin/docker-compose
|
sudo curl -L https://github.com/docker/compose/releases/download/v2.17.2/docker-compose-linux-x86_64 -o /usr/local/bin/docker-compose
|
||||||
sudo chmod a+x /usr/local/bin/docker-compose
|
sudo chmod a+x /usr/local/bin/docker-compose
|
||||||
|
|
||||||
# Pull tabby images.
|
# Install tabby python client.
|
||||||
git clone https://github.com/TabbyML/tabby.git || true
|
pip install -r requirements.txt
|
||||||
cd tabby/experimental
|
|
||||||
|
|
||||||
# On certain cloud providers (e.g lambda cloud), the default user is not added to docker group, so we need sudo here
|
# On certain cloud providers (e.g lambda cloud), the default user is not added to docker group, so we need sudo here
|
||||||
sudo docker-compose pull
|
sudo docker-compose pull
|
||||||
|
|
@ -20,5 +22,4 @@ setup: |
|
||||||
|
|
||||||
|
|
||||||
run: |
|
run: |
|
||||||
cd tabby/experimental
|
./eval.sh
|
||||||
sudo docker-compose up
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue