feat: add python eval script (#266)

* feat: add python eval script * update * add local script * move eval script * update * update * update * update * update * update * update * update * update * update * update * update * add README
2023-07-11 09:53:04 +08:00 · 2023-07-11 09:53:04 +08:00 · bed723fced
parent e822d1857f
commit bed723fced
11 changed files with 553 additions and 7 deletions
--- a/experimental/eval/.gitignore
+++ b/experimental/eval/.gitignore
@ -0,0 +1,4 @@
+__pycache__
+.ipynb_checkpoints
+reports.*
+tabby
--- a/experimental/eval/README.md
+++ b/experimental/eval/README.md
@ -0,0 +1,7 @@
+# Eval
+
+## Local
+`./eval.sh`
+
+## Skypilot
+`./eval_sky.sh`
--- a/experimental/eval/config.toml
+++ b/experimental/eval/config.toml
@ -0,0 +1,2 @@
+[[repositories]]
+git_url = "https://github.com/huggingface/text-generation-inference"
--- a/experimental/eval/docker-compose.cuda.yaml
+++ b/experimental/eval/docker-compose.cuda.yaml
@ -0,0 +1,11 @@
+version: '3.5'
+services:
+  tabby:
+    command: serve --model TabbyML/SantaCoder-1B --device cuda
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
--- a/experimental/eval/docker-compose.yaml
+++ b/experimental/eval/docker-compose.yaml
@ -6,7 +6,7 @@ services:
    platform: linux/amd64
    command: scheduler --now
    volumes:
-      - "$HOME/.tabby:/data"
+      - "$PWD/tabby:/data"

  tabby:
    depends_on:
@ -15,5 +15,7 @@ services:
    image: tabbyml/tabby
    platform: linux/amd64
    command: serve --model TabbyML/T5P-220M
+    ports:
+      - "8080:8080"
    volumes:
-      - "$HOME/.tabby:/data"
+      - "$PWD/tabby:/data"
--- a/experimental/eval/eval.sh
+++ b/experimental/eval/eval.sh
@ -0,0 +1,26 @@
+#!/bin/bash
+set -ex
+
+mkdir -p tabby
+cp config.toml tabby/
+
+docker-compose down
+
+if nvidia-smi; then
+  docker-compose -f docker-compose.yaml -f docker-compose.cuda.yaml up -d
+else
+  docker-compose up -d
+fi
+
+while ! curl -X POST http://localhost:8080/v1/health; do
+  echo "server not ready, waiting..."
+  sleep 5
+done
+
+papermill main.ipynb ./reports.ipynb -r filepattern "./tabby/dataset/*.jsonl" -r max_records "${MAX_RECORDS:-3}"
+
+jupyter nbconvert reports.ipynb --TagRemovePreprocessor.enabled=True --TagRemovePreprocessor.remove_cell_tags remove --to html
+
+docker-compose down
+
+echo done
--- a/experimental/eval/eval_sky.sh
+++ b/experimental/eval/eval_sky.sh
@ -0,0 +1,11 @@
+#!/bin/bash
+set -ex
+
+ARGS="tabby-eval skypilot.yaml --env MAX_RECORDS=300"
+
+if ! sky exec $ARGS; then
+  sky launch -c $ARGS
+fi
+
+scp tabby-eval:~/sky_workdir/reports.ipynb ./
+scp tabby-eval:~/sky_workdir/reports.html ./
--- a/experimental/eval/main.ipynb
+++ b/experimental/eval/main.ipynb
--- a/experimental/eval/processing.py
+++ b/experimental/eval/processing.py
@ -0,0 +1,89 @@
+from typing import Iterator
+
+import glob
+import json
+from dataclasses import dataclass
+from transformers import HfArgumentParser
+
+
+@dataclass
+class Item:
+    git_url: str
+    filepath: str
+    language: str
+
+    name: str
+    body: str
+    prefix: str
+    suffix: str
+
+
+def iter_items(doc) -> Iterator[Item]:
+    if doc["max_line_length"] > 500:
+        return
+
+    if doc["avg_line_length"] < 10 or doc["avg_line_length"] > 200:
+        return
+
+    if doc["alphanum_fraction"] < 0.25:
+        return
+
+    for tag in doc["tags"]:
+        content = doc["content"]
+        name = get_content(content, tag["name_range"])
+        body = get_content(content, tag["range"])
+
+        prefix = get_prefix(content, tag["range"]["start"])
+        suffix = get_suffix(content, tag["range"]["end"])
+
+        yield Item(
+            name=name,
+            body=body,
+            prefix=prefix,
+            suffix=suffix,
+            git_url=doc["git_url"],
+            filepath=doc["filepath"],
+            language=doc["language"],
+        )
+
+
+def iter_docs(filepattern: str):
+    for filepath in glob.glob(filepattern):
+        with open(filepath) as f:
+            for line in f:
+                yield json.loads(line)
+
+
+def get_content(content: str, range: dict):
+    return content[range["start"] : range["end"]]
+
+
+def get_prefix(content: str, start: int, max=20):
+    num_lines = 0
+    prefix_start = 0
+    for prefix_start in range(start - 1, 0, -1):
+        if content[prefix_start] == "\n":
+            num_lines += 1
+
+        if num_lines == max:
+            break
+
+    return content[prefix_start + 1 : start]
+
+
+def get_suffix(content: str, end: int, max=20):
+    num_lines = 0
+    suffix_end = end
+    for suffix_end in range(end, len(content)):
+        if content[suffix_end] == "\n":
+            num_lines += 1
+
+        if num_lines == max:
+            break
+
+    return content[end : suffix_end - 1]
+
+
+def items_from_filepattern(filepattern: str):
+    for doc in iter_docs(filepattern):
+        yield from iter_items(doc)
--- a/experimental/eval/requirements.txt
+++ b/experimental/eval/requirements.txt
@ -0,0 +1,6 @@
+papermill
+git+https://github.com/TabbyML/tabby.git#egg=tabby-python-client&subdirectory=clients/tabby-python-client
+transformers
+editdistance
+matplotlib
+notebook
--- a/experimental/eval/skypilot.yaml
+++ b/experimental/eval/skypilot.yaml
@ -1,6 +1,9 @@
 resources:
  accelerators: T4:1

+# tabby base dir
+workdir: ./
+
 setup: |
  set -ex

@ -8,9 +11,8 @@ setup: |
  sudo curl -L https://github.com/docker/compose/releases/download/v2.17.2/docker-compose-linux-x86_64 -o /usr/local/bin/docker-compose
  sudo chmod a+x /usr/local/bin/docker-compose

-  # Pull tabby images.
-  git clone https://github.com/TabbyML/tabby.git || true
-  cd tabby/experimental
+  # Install tabby python client.
+  pip install -r requirements.txt

  # On certain cloud providers (e.g lambda cloud), the default user is not added to docker group, so we need sudo here
  sudo docker-compose pull
@ -20,5 +22,4 @@ setup: |


 run: |
-  cd tabby/experimental
-  sudo docker-compose up
+  ./eval.sh