feat: add python eval script (#266)

* feat: add python eval script

* update

* add local script

* move eval script

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* update

* add README
sweep/improve-logging-information
Meng Zhang 2023-07-11 09:53:04 +08:00 committed by GitHub
parent e822d1857f
commit bed723fced
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 553 additions and 7 deletions

4
experimental/eval/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
__pycache__
.ipynb_checkpoints
reports.*
tabby

View File

@ -0,0 +1,7 @@
# Eval
## Local
`./eval.sh`
## Skypilot
`./eval_sky.sh`

View File

@ -0,0 +1,2 @@
[[repositories]]
git_url = "https://github.com/huggingface/text-generation-inference"

View File

@ -0,0 +1,11 @@
version: '3.5'
services:
tabby:
command: serve --model TabbyML/SantaCoder-1B --device cuda
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]

View File

@ -6,7 +6,7 @@ services:
platform: linux/amd64 platform: linux/amd64
command: scheduler --now command: scheduler --now
volumes: volumes:
- "$HOME/.tabby:/data" - "$PWD/tabby:/data"
tabby: tabby:
depends_on: depends_on:
@ -15,5 +15,7 @@ services:
image: tabbyml/tabby image: tabbyml/tabby
platform: linux/amd64 platform: linux/amd64
command: serve --model TabbyML/T5P-220M command: serve --model TabbyML/T5P-220M
ports:
- "8080:8080"
volumes: volumes:
- "$HOME/.tabby:/data" - "$PWD/tabby:/data"

26
experimental/eval/eval.sh Executable file
View File

@ -0,0 +1,26 @@
#!/bin/bash
set -ex
mkdir -p tabby
cp config.toml tabby/
docker-compose down
if nvidia-smi; then
docker-compose -f docker-compose.yaml -f docker-compose.cuda.yaml up -d
else
docker-compose up -d
fi
while ! curl -X POST http://localhost:8080/v1/health; do
echo "server not ready, waiting..."
sleep 5
done
papermill main.ipynb ./reports.ipynb -r filepattern "./tabby/dataset/*.jsonl" -r max_records "${MAX_RECORDS:-3}"
jupyter nbconvert reports.ipynb --TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags remove --to html
docker-compose down
echo done

11
experimental/eval/eval_sky.sh Executable file
View File

@ -0,0 +1,11 @@
#!/bin/bash
set -ex
ARGS="tabby-eval skypilot.yaml --env MAX_RECORDS=300"
if ! sky exec $ARGS; then
sky launch -c $ARGS
fi
scp tabby-eval:~/sky_workdir/reports.ipynb ./
scp tabby-eval:~/sky_workdir/reports.html ./

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,89 @@
from typing import Iterator
import glob
import json
from dataclasses import dataclass
from transformers import HfArgumentParser
@dataclass
class Item:
git_url: str
filepath: str
language: str
name: str
body: str
prefix: str
suffix: str
def iter_items(doc) -> Iterator[Item]:
if doc["max_line_length"] > 500:
return
if doc["avg_line_length"] < 10 or doc["avg_line_length"] > 200:
return
if doc["alphanum_fraction"] < 0.25:
return
for tag in doc["tags"]:
content = doc["content"]
name = get_content(content, tag["name_range"])
body = get_content(content, tag["range"])
prefix = get_prefix(content, tag["range"]["start"])
suffix = get_suffix(content, tag["range"]["end"])
yield Item(
name=name,
body=body,
prefix=prefix,
suffix=suffix,
git_url=doc["git_url"],
filepath=doc["filepath"],
language=doc["language"],
)
def iter_docs(filepattern: str):
for filepath in glob.glob(filepattern):
with open(filepath) as f:
for line in f:
yield json.loads(line)
def get_content(content: str, range: dict):
return content[range["start"] : range["end"]]
def get_prefix(content: str, start: int, max=20):
num_lines = 0
prefix_start = 0
for prefix_start in range(start - 1, 0, -1):
if content[prefix_start] == "\n":
num_lines += 1
if num_lines == max:
break
return content[prefix_start + 1 : start]
def get_suffix(content: str, end: int, max=20):
num_lines = 0
suffix_end = end
for suffix_end in range(end, len(content)):
if content[suffix_end] == "\n":
num_lines += 1
if num_lines == max:
break
return content[end : suffix_end - 1]
def items_from_filepattern(filepattern: str):
for doc in iter_docs(filepattern):
yield from iter_items(doc)

View File

@ -0,0 +1,6 @@
papermill
git+https://github.com/TabbyML/tabby.git#egg=tabby-python-client&subdirectory=clients/tabby-python-client
transformers
editdistance
matplotlib
notebook

View File

@ -1,6 +1,9 @@
resources: resources:
accelerators: T4:1 accelerators: T4:1
# tabby base dir
workdir: ./
setup: | setup: |
set -ex set -ex
@ -8,9 +11,8 @@ setup: |
sudo curl -L https://github.com/docker/compose/releases/download/v2.17.2/docker-compose-linux-x86_64 -o /usr/local/bin/docker-compose sudo curl -L https://github.com/docker/compose/releases/download/v2.17.2/docker-compose-linux-x86_64 -o /usr/local/bin/docker-compose
sudo chmod a+x /usr/local/bin/docker-compose sudo chmod a+x /usr/local/bin/docker-compose
# Pull tabby images. # Install tabby python client.
git clone https://github.com/TabbyML/tabby.git || true pip install -r requirements.txt
cd tabby/experimental
# On certain cloud providers (e.g lambda cloud), the default user is not added to docker group, so we need sudo here # On certain cloud providers (e.g lambda cloud), the default user is not added to docker group, so we need sudo here
sudo docker-compose pull sudo docker-compose pull
@ -20,5 +22,4 @@ setup: |
run: | run: |
cd tabby/experimental ./eval.sh
sudo docker-compose up