feat(experimental): add script to run eval in cli
parent
b135022dc0
commit
7ca416aa13
|
|
@ -36,12 +36,13 @@ def run_eval(args):
|
||||||
except:
|
except:
|
||||||
print(f"Tabby Server is not ready, please check if '{api}' is correct.")
|
print(f"Tabby Server is not ready, please check if '{api}' is correct.")
|
||||||
return
|
return
|
||||||
|
|
||||||
items = [x for x in processing.items_from_filepattern(args.filepattern) if valid_item(x)];
|
items = [
|
||||||
|
x for x in processing.items_from_filepattern(args.filepattern) if valid_item(x)
|
||||||
|
]
|
||||||
if len(items) > args.max_records:
|
if len(items) > args.max_records:
|
||||||
random.seed(0xbadbeef)
|
random.seed(0xBADBEEF)
|
||||||
items = random.sample(items, args.max_records)
|
items = random.sample(items, args.max_records)
|
||||||
|
|
||||||
|
|
||||||
for item in items:
|
for item in items:
|
||||||
if not valid_item(item):
|
if not valid_item(item):
|
||||||
|
|
@ -56,10 +57,10 @@ def run_eval(args):
|
||||||
prediction = resp.choices[0].text
|
prediction = resp.choices[0].text
|
||||||
|
|
||||||
block_score = scorer(label, prediction)
|
block_score = scorer(label, prediction)
|
||||||
|
|
||||||
label_lines = label.splitlines()
|
label_lines = label.splitlines()
|
||||||
prediction_lines = prediction.splitlines()
|
prediction_lines = prediction.splitlines()
|
||||||
|
|
||||||
if len(label_lines) > 0 and len(prediction_lines) > 0:
|
if len(label_lines) > 0 and len(prediction_lines) > 0:
|
||||||
line_score = scorer(label_lines[0], prediction_lines[0])
|
line_score = scorer(label_lines[0], prediction_lines[0])
|
||||||
|
|
||||||
|
|
@ -71,13 +72,19 @@ def run_eval(args):
|
||||||
line_score=line_score,
|
line_score=line_score,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
|
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='SxS eval for tabby')
|
parser = argparse.ArgumentParser(
|
||||||
parser.add_argument('filepattern', type=str, help='File pattern to dataset.')
|
description="SxS eval for tabby",
|
||||||
parser.add_argument('max_records', type=int, help='Max number of records to be evaluated.')
|
epilog="Example usage: python main.py ./tabby/dataset/data.jsonl 5 > output.jsonl",
|
||||||
|
)
|
||||||
|
parser.add_argument("filepattern", type=str, help="File pattern to dataset.")
|
||||||
|
parser.add_argument(
|
||||||
|
"max_records", type=int, help="Max number of records to be evaluated."
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
logging.info("args %s", args)
|
logging.info("args %s", args)
|
||||||
df = pd.DataFrame(run_eval(args))
|
df = pd.DataFrame(run_eval(args))
|
||||||
print(df.to_json(orient='records', lines=True))
|
print(df.to_json(orient="records", lines=True))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue