From edb3a72ad631ed4d3180c3131c68bcd90c4c2b35 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Fri, 24 Mar 2023 09:58:04 +0800 Subject: [PATCH] Move python scripts into tasks --- Makefile | 2 +- README.md | 3 +-- tasks/README.md | 2 ++ {converter => tasks/converter}/huggingface_gptneox_convert.py | 0 {preprocess => tasks/preprocess}/args.py | 0 {preprocess => tasks/preprocess}/filters.py | 0 {preprocess => tasks/preprocess}/metrics.py | 0 {preprocess => tasks/preprocess}/preprocess_project.py | 0 .../preprocess}/programming-languages-to-file-extensions.json | 0 9 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 tasks/README.md rename {converter => tasks/converter}/huggingface_gptneox_convert.py (100%) rename {preprocess => tasks/preprocess}/args.py (100%) rename {preprocess => tasks/preprocess}/filters.py (100%) rename {preprocess => tasks/preprocess}/metrics.py (100%) rename {preprocess => tasks/preprocess}/preprocess_project.py (100%) rename {preprocess => tasks/preprocess}/programming-languages-to-file-extensions.json (100%) diff --git a/Makefile b/Makefile index ae07799..1b1968b 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ $(PRE_COMMIT_HOOK): poetry run pre-commit install --install-hooks $(LOCAL_MODEL): - poetry run python converter/huggingface_gptneox_convert.py \ + poetry run python tasks/converter/huggingface_gptneox_convert.py \ -in_file EleutherAI/pythia-70m-deduped \ -o $@ \ -i_g 1 -m_n tiny-70M -p 1 -w fp16 diff --git a/README.md b/README.md index a1a73d8..38049ef 100644 --- a/README.md +++ b/README.md @@ -12,8 +12,7 @@ An opensource / on-prem alternative to GitHub Copilot * [`admin`](./admin): Admin panel for monitoring / settings purpose. * [`server`](./server): API server for completion requests. It also logs users' selections (as feedback to model's quality). * [`deployment`](./deployment): Container related deployment configs. -* [`converter`](./converter): Converts a [transformers](https://huggingface.co/docs/transformers) causal LM model into TensorRT / FasterTransformer serving formats. -* [`preprocess`](./preprocess): Preprocess files into [datasets](https://huggingface.co/docs/datasets) +* [`tasks`](./tasks): Various data processing scripts. * [`tabformer`](./tabformer): *NOT RELEASED* Trainer(PEFT w/RLHF) for tabby models. ## Development diff --git a/tasks/README.md b/tasks/README.md new file mode 100644 index 0000000..ca35de9 --- /dev/null +++ b/tasks/README.md @@ -0,0 +1,2 @@ +* [`converter`](./converter): Converts a [transformers](https://huggingface.co/docs/transformers) causal LM model into TensorRT / FasterTransformer serving formats. +* [`preprocess`](./preprocess): Preprocess files into [datasets](https://huggingface.co/docs/datasets) diff --git a/converter/huggingface_gptneox_convert.py b/tasks/converter/huggingface_gptneox_convert.py similarity index 100% rename from converter/huggingface_gptneox_convert.py rename to tasks/converter/huggingface_gptneox_convert.py diff --git a/preprocess/args.py b/tasks/preprocess/args.py similarity index 100% rename from preprocess/args.py rename to tasks/preprocess/args.py diff --git a/preprocess/filters.py b/tasks/preprocess/filters.py similarity index 100% rename from preprocess/filters.py rename to tasks/preprocess/filters.py diff --git a/preprocess/metrics.py b/tasks/preprocess/metrics.py similarity index 100% rename from preprocess/metrics.py rename to tasks/preprocess/metrics.py diff --git a/preprocess/preprocess_project.py b/tasks/preprocess/preprocess_project.py similarity index 100% rename from preprocess/preprocess_project.py rename to tasks/preprocess/preprocess_project.py diff --git a/preprocess/programming-languages-to-file-extensions.json b/tasks/preprocess/programming-languages-to-file-extensions.json similarity index 100% rename from preprocess/programming-languages-to-file-extensions.json rename to tasks/preprocess/programming-languages-to-file-extensions.json