Support linux ctranslate2 cuda build (#147)
* Support linux build * add <memory> to fix build error in linux * add Dockerfile.tabby * update * update * add rust docker image pipeline * add docker.rust.ymladd-tracing
parent
80588ddd22
commit
0acc975618
|
|
@ -0,0 +1,90 @@
|
|||
name: Create and publish docker image
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches: [ "main" ]
|
||||
paths:
|
||||
- 'crates/**'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
# This is used to complete the identity challenge
|
||||
# with sigstore/fulcio when running outside of PRs.
|
||||
id-token: write
|
||||
|
||||
steps:
|
||||
- name: Free Disk Space (Ubuntu)
|
||||
uses: jlumbroso/free-disk-space@main
|
||||
with:
|
||||
# this might remove tools that are actually needed,
|
||||
# if set to "true" but frees about 6 GB
|
||||
tool-cache: true
|
||||
|
||||
# all of these default to true, but feel free to set to
|
||||
# "false" if necessary for your workflow
|
||||
android: true
|
||||
dotnet: true
|
||||
haskell: true
|
||||
large-packages: false
|
||||
swap-storage: true
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
# Workaround: https://github.com/docker/build-push-action/issues/461
|
||||
- name: Setup Docker buildx
|
||||
uses: docker/setup-buildx-action@v2.0.0
|
||||
|
||||
# Login against a Docker registry except on PR
|
||||
# https://github.com/docker/login-action
|
||||
- name: Log into GitHub Container registry
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v2.0.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Log into Docker Hub
|
||||
if: github.event_name != 'pull_request'
|
||||
uses: docker/login-action@v2.0.0
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
|
||||
- name: Generate image name
|
||||
run: |
|
||||
echo "IMAGE_NAME=${GITHUB_REPOSITORY,,}-rust" >>${GITHUB_ENV}
|
||||
|
||||
- uses: int128/docker-build-cache-config-action@v1
|
||||
id: cache
|
||||
with:
|
||||
image: ghcr.io/${{ env.IMAGE_NAME }}/cache
|
||||
|
||||
# Build and push Docker image with Buildx (don't push on PR)
|
||||
# https://github.com/docker/build-push-action
|
||||
- name: Build and push Docker image
|
||||
id: build-and-push
|
||||
uses: docker/build-push-action@v3.1.1
|
||||
with:
|
||||
file: Dockerfile.rust
|
||||
context: .
|
||||
push: ${{ github.event_name != 'pull_request' }}
|
||||
tags: |
|
||||
ghcr.io/${{ env.IMAGE_NAME }}:latest
|
||||
${{ env.IMAGE_NAME }}:latest
|
||||
cache-from: ${{ steps.cache.outputs.cache-from }}
|
||||
cache-to: ${{ steps.cache.outputs.cache-to }}
|
||||
|
||||
- name: Docker Hub Description
|
||||
uses: peter-evans/dockerhub-description@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||
repository: tabbyml/tabby-rust
|
||||
|
|
@ -0,0 +1,78 @@
|
|||
FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 as builder
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
python3-dev \
|
||||
python3-pip \
|
||||
wget \
|
||||
pkg-config \
|
||||
libssl-dev \
|
||||
&& \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
ENV ONEAPI_VERSION=2023.0.0
|
||||
ENV MKL_BUILD=25398
|
||||
RUN wget -q https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB && \
|
||||
apt-key add *.PUB && \
|
||||
rm *.PUB && \
|
||||
echo "deb https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list && \
|
||||
apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
intel-oneapi-mkl-devel=$ONEAPI_VERSION-$MKL_BUILD \
|
||||
&& \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN python3 -m pip --no-cache-dir install cmake==3.22.*
|
||||
|
||||
ENV ONEDNN_VERSION=3.0.1
|
||||
RUN wget -q https://github.com/oneapi-src/oneDNN/archive/refs/tags/v${ONEDNN_VERSION}.tar.gz && \
|
||||
tar xf *.tar.gz && \
|
||||
rm *.tar.gz && \
|
||||
cd oneDNN-* && \
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DDNNL_LIBRARY_TYPE=STATIC -DDNNL_BUILD_EXAMPLES=OFF -DDNNL_BUILD_TESTS=OFF -DDNNL_ENABLE_WORKLOAD=INFERENCE -DDNNL_ENABLE_PRIMITIVE="CONVOLUTION;REORDER" . && \
|
||||
make -j$(nproc) install && \
|
||||
cd .. && \
|
||||
rm -r oneDNN-*
|
||||
|
||||
|
||||
# setup rust.
|
||||
RUN wget -O - https://sh.rustup.rs | bash -s -- -y
|
||||
ENV PATH="/root/.cargo/bin:${PATH}"
|
||||
|
||||
COPY crates crates
|
||||
|
||||
WORKDIR /root/crates/tabby
|
||||
|
||||
RUN cargo build --release
|
||||
|
||||
RUN mkdir -p /opt/tabby/bin
|
||||
RUN cp target/release/tabby /opt/tabby/bin/
|
||||
|
||||
RUN mkdir -p /opt/tabby/lib
|
||||
RUN cp $(dirname $(find target | grep lib/libctranslate2 | head -1))/libctranslate2*.so /opt/tabby/lib
|
||||
|
||||
FROM nvidia/cuda:11.2.2-base-ubuntu20.04
|
||||
|
||||
# We remove the cuda-compat package because it conflicts with the CUDA Enhanced Compatibility.
|
||||
# See e.g. https://github.com/NVIDIA/nvidia-docker/issues/1515
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
libcublas-11-2 \
|
||||
libcudnn8=8.1.1.33-1+cuda11.2 \
|
||||
libgomp1 \
|
||||
python3-pip \
|
||||
&& \
|
||||
apt-get purge -y cuda-compat-11-2 && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV TABBY_ROOT=/opt/tabby
|
||||
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TABBY_ROOT/lib
|
||||
|
||||
COPY --from=builder $TABBY_ROOT $TABBY_ROOT
|
||||
|
||||
ENTRYPOINT ["/opt/tabby/bin/tabby"]
|
||||
|
|
@ -1,20 +1,33 @@
|
|||
use cmake::Config;
|
||||
|
||||
fn main() {
|
||||
let dst = Config::new("CTranslate2")
|
||||
// Default flags.
|
||||
let mut config = Config::new("CTranslate2");
|
||||
config
|
||||
.define("CMAKE_BUILD_TYPE", "Release")
|
||||
.define("BUILD_CLI", "OFF")
|
||||
.define("CMAKE_INSTALL_RPATH_USE_LINK_PATH", "ON")
|
||||
.define("BUILD_SHARED_LIBS", "ON")
|
||||
.define("CMAKE_INSTALL_RPATH_USE_LINK_PATH", "ON");
|
||||
|
||||
// FIXME(meng): support linux build.
|
||||
// OSX flags.
|
||||
.define("CMAKE_OSX_ARCHITECTURES", "arm64")
|
||||
.define("WITH_ACCELERATE", "ON")
|
||||
.define("WITH_MKL", "OFF")
|
||||
.define("OPENMP_RUNTIME", "NONE")
|
||||
.define("WITH_RUY", "ON")
|
||||
.build();
|
||||
if cfg!(target_os = "macos") {
|
||||
config
|
||||
.define("CMAKE_OSX_ARCHITECTURES", "arm64")
|
||||
.define("WITH_ACCELERATE", "ON")
|
||||
.define("WITH_MKL", "OFF")
|
||||
.define("OPENMP_RUNTIME", "NONE")
|
||||
.define("WITH_RUY", "ON");
|
||||
} else if cfg!(target_os = "linux") {
|
||||
config
|
||||
.define("WITH_CUDA", "ON")
|
||||
.define("WITH_CUDNN", "ON")
|
||||
.define("WITH_MKL", "ON")
|
||||
.define("WITH_DNNL", "ON")
|
||||
.define("OPENMP_RUNTIME", "COMP")
|
||||
.cxxflag("-msse4.1")
|
||||
.define("CUDA_NVCC_FLAGS", "-Xfatbin=-compress-all")
|
||||
.define("CUDA_ARCH_LIST", "Common");
|
||||
}
|
||||
|
||||
let dst = config.build();
|
||||
|
||||
println!("cargo:rustc-link-search=native={}", dst.join("lib").display());
|
||||
println!("cargo:rustc-link-lib=ctranslate2");
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "rust/cxx.h"
|
||||
#include <memory>
|
||||
|
||||
namespace tabby {
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue