tabby/crates/ctranslate2-bindings/ctranslate2/tools/benchmark/benchmark.py

306 lines
8.5 KiB
Python

import GPUtil
import argparse
import collections
import docker
import os
import sacrebleu
import tempfile
import time
client = docker.from_env()
docker_version = client.version()["Version"]
docker_version_numbers = docker_version.split(".")
docker_major_version = int(docker_version_numbers[0])
docker_minor_version = int(docker_version_numbers[1])
def _get_bleu_score(hyp_file, ref_file):
with open(hyp_file) as hyp, open(ref_file) as ref:
bleu = sacrebleu.corpus_bleu(hyp, [ref], force=True)
return bleu.score
def _count_tokens(path):
with open(path) as file:
num_tokens = 0
for line in file:
num_tokens += len(line.strip().split(" "))
return num_tokens
def _monitor_container(container, poll_interval=1, use_gpu=False):
max_cpu_mem = 0
max_gpu_mem = 0
result = None
while True:
try:
result = container.wait(timeout=1)
break
except:
pass
stats = container.stats(stream=False)
memory_stats = stats["memory_stats"]
memory_usage = memory_stats.get("usage")
if memory_usage is not None:
max_cpu_mem = max(max_cpu_mem, float(memory_usage / 1000000))
if use_gpu:
max_gpu_mem = max(max_gpu_mem, float(GPUtil.getGPUs()[0].memoryUsed))
if result is not None and result["StatusCode"] != 0:
stderr = container.logs(stdout=False).decode("utf-8")
raise RuntimeError(
"Container exited with status code %d:\n\n%s"
% (result["StatusCode"], stderr)
)
return max_cpu_mem, max_gpu_mem
def _process_file(image_name, script, input_file, output_file):
input_dir = "/input"
output_dir = "/output"
client.containers.run(
image_name,
command=[
os.path.join(input_dir, os.path.basename(input_file)),
os.path.join(output_dir, os.path.basename(output_file)),
],
entrypoint=script,
remove=True,
mounts=[
docker.types.Mount(input_dir, os.path.dirname(input_file), type="bind"),
docker.types.Mount(output_dir, os.path.dirname(output_file), type="bind"),
],
)
def _tokenize(image_name, input_file, output_file):
return _process_file(image_name, "/tokenize", input_file, output_file)
def _detokenize(image_name, input_file, output_file):
return _process_file(image_name, "/detokenize", input_file, output_file)
def _start_translation(
image_name,
source_file,
output_file,
environment,
num_cpus,
use_gpu,
):
kwargs = {}
environment = environment.copy() if environment else {}
environment["OMP_NUM_THREADS"] = str(num_cpus)
if use_gpu:
device = "GPU"
if docker_major_version < 19 or (
docker_major_version == 19 and docker_minor_version < 3
):
kwargs["runtime"] = "nvidia"
else:
kwargs["device_requests"] = [
docker.types.DeviceRequest(count=0, capabilities=[["gpu"]])
]
else:
device = "CPU"
environment["CUDA_VISIBLE_DEVICES"] = ""
data_dir = "/data"
output_dir = "/output"
container = client.containers.run(
image_name,
[
device,
os.path.join(data_dir, os.path.basename(source_file)),
os.path.join(output_dir, os.path.basename(output_file)),
],
entrypoint="/translate",
detach=True,
mounts=[
docker.types.Mount(data_dir, os.path.dirname(source_file), type="bind"),
docker.types.Mount(output_dir, os.path.dirname(output_file), type="bind"),
],
environment=environment,
**kwargs
)
return container
def _benchmark_translation(
image_name,
source_file,
target_file,
environment,
num_cpus,
use_gpu,
):
with tempfile.TemporaryDirectory() as tmp_dir:
source_file_tok = os.path.join(tmp_dir, "source.txt.tok")
output_file_tok = os.path.join(tmp_dir, "output.txt.tok")
output_file = os.path.join(tmp_dir, "output.txt")
_tokenize(image_name, source_file, source_file_tok)
container = _start_translation(
image_name,
source_file_tok,
output_file_tok,
environment,
num_cpus,
use_gpu,
)
try:
start = time.time()
max_cpu_mem, max_gpu_mem = _monitor_container(container, use_gpu=use_gpu)
end = time.time()
elapsed_time = end - start
num_tokens = _count_tokens(output_file_tok)
_detokenize(image_name, output_file_tok, output_file)
bleu = _get_bleu_score(output_file, target_file)
return elapsed_time, num_tokens, max_cpu_mem, max_gpu_mem, bleu
finally:
container.remove(force=True)
class BenchmarkResult(
collections.namedtuple(
"BenchmarkResult",
(
"total_time",
"translation_time",
"num_tokens",
"max_cpu_mem",
"max_gpu_mem",
"bleu_score",
),
)
):
pass
def benchmark_image(
image_name,
source_file,
target_file,
num_samples=1,
environment=None,
num_cpus=4,
use_gpu=False,
):
source_file = os.path.abspath(source_file)
target_file = os.path.abspath(target_file)
initialization_time = None
with tempfile.NamedTemporaryFile() as tmp_file:
for _ in range(num_samples):
container = _start_translation(
image_name,
tmp_file.name,
tmp_file.name,
environment,
num_cpus,
use_gpu,
)
try:
start = time.time()
container.wait()
end = time.time()
elapsed_time = end - start
initialization_time = (
elapsed_time
if initialization_time is None
else min(initialization_time, elapsed_time)
)
finally:
container.remove(force=True)
total_time = None
num_tokens = 0
bleu = 0
max_cpu_mem = 0
max_gpu_mem = 0
for _ in range(num_samples):
results = _benchmark_translation(
image_name,
source_file,
target_file,
environment,
num_cpus,
use_gpu,
)
total_time = results[0] if total_time is None else min(total_time, results[0])
num_tokens = results[1]
max_cpu_mem = max(max_cpu_mem, results[2])
max_gpu_mem = max(max_gpu_mem, results[3])
bleu = results[4]
translation_time = total_time - initialization_time
return BenchmarkResult(
total_time,
translation_time,
num_tokens,
max_cpu_mem,
max_gpu_mem,
bleu,
)
def main():
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"--num_samples",
type=int,
default=1,
help="aggregate results over this number of runs",
)
parser.add_argument("--num_cpus", type=int, default=4, help="number of CPUs to use")
parser.add_argument("--gpu", action="store_true", help="run on GPU")
parser.add_argument(
"--env",
type=str,
nargs=2,
action="append",
default=[],
help="add this environment variable to the Docker container",
)
parser.add_argument("image", type=str, help="name of Docker image to benchmark")
parser.add_argument("src", type=str, help="source file")
parser.add_argument("ref", type=str, help="reference file")
args = parser.parse_args()
result = benchmark_image(
args.image,
args.src,
args.ref,
num_samples=args.num_samples,
environment={key: value for key, value in args.env},
num_cpus=args.num_cpus,
use_gpu=args.gpu,
)
print("Benchmark result (%d sample(s)):" % args.num_samples)
print("- total time: %.2f s" % result.total_time)
print("- translation time: %.2f s" % result.translation_time)
print("- tokens per second: %.1f" % (result.num_tokens / result.translation_time))
print("- max. CPU memory usage: %dMB" % int(result.max_cpu_mem))
if args.gpu:
print("- max. GPU memory usage: %dMB" % int(result.max_gpu_mem))
print("- BLEU score: %.2f" % result.bleu_score)
if __name__ == "__main__":
main()