Support linux ctranslate2 cuda build (#147)

* Support linux build * add <memory> to fix build error in linux * add Dockerfile.tabby * update * update * add rust docker image pipeline * add docker.rust.yml
2023-05-25 18:18:22 -07:00 · 2023-05-25 18:18:22 -07:00 · 0acc975618
parent 80588ddd22
commit 0acc975618
4 changed files with 193 additions and 11 deletions
--- a/.github/workflows/docker.rust.yml
+++ b/.github/workflows/docker.rust.yml
@ -0,0 +1,90 @@
+name: Create and publish docker image
+
+on:
+  workflow_dispatch:
+  push:
+    branches: [ "main" ]
+    paths:
+      - 'crates/**'
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+      # This is used to complete the identity challenge
+      # with sigstore/fulcio when running outside of PRs.
+      id-token: write
+
+    steps:
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # this might remove tools that are actually needed,
+          # if set to "true" but frees about 6 GB
+          tool-cache: true
+
+          # all of these default to true, but feel free to set to
+          # "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: false
+          swap-storage: true
+
+      - name: Checkout repository
+        uses: actions/checkout@v3
+
+      # Workaround: https://github.com/docker/build-push-action/issues/461
+      - name: Setup Docker buildx
+        uses: docker/setup-buildx-action@v2.0.0
+
+      # Login against a Docker registry except on PR
+      # https://github.com/docker/login-action
+      - name: Log into GitHub Container registry
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v2.0.0
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Log into Docker Hub
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v2.0.0
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Generate image name
+        run: |
+          echo "IMAGE_NAME=${GITHUB_REPOSITORY,,}-rust" >>${GITHUB_ENV}
+
+      - uses: int128/docker-build-cache-config-action@v1
+        id: cache
+        with:
+          image: ghcr.io/${{ env.IMAGE_NAME }}/cache
+
+      # Build and push Docker image with Buildx (don't push on PR)
+      # https://github.com/docker/build-push-action
+      - name: Build and push Docker image
+        id: build-and-push
+        uses: docker/build-push-action@v3.1.1
+        with:
+          file: Dockerfile.rust
+          context: .
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: |
+            ghcr.io/${{ env.IMAGE_NAME }}:latest
+            ${{ env.IMAGE_NAME }}:latest
+          cache-from: ${{ steps.cache.outputs.cache-from }}
+          cache-to: ${{ steps.cache.outputs.cache-to }}
+
+      - name: Docker Hub Description
+        uses: peter-evans/dockerhub-description@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+          repository: tabbyml/tabby-rust
--- a/Dockerfile.rust
+++ b/Dockerfile.rust
@ -0,0 +1,78 @@
+FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 as builder
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        python3-dev \
+        python3-pip \
+        wget \
+        pkg-config \
+        libssl-dev \
+        && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+WORKDIR /root
+
+ENV ONEAPI_VERSION=2023.0.0
+ENV MKL_BUILD=25398
+RUN wget -q https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB && \
+    apt-key add *.PUB && \
+    rm *.PUB && \
+    echo "deb https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list && \
+    apt-get update && \
+    apt-get install -y --no-install-recommends \
+        intel-oneapi-mkl-devel=$ONEAPI_VERSION-$MKL_BUILD \
+        && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN python3 -m pip --no-cache-dir install cmake==3.22.*
+
+ENV ONEDNN_VERSION=3.0.1
+RUN wget -q https://github.com/oneapi-src/oneDNN/archive/refs/tags/v${ONEDNN_VERSION}.tar.gz && \
+    tar xf *.tar.gz && \
+    rm *.tar.gz && \
+    cd oneDNN-* && \
+    cmake -DCMAKE_BUILD_TYPE=Release -DDNNL_LIBRARY_TYPE=STATIC -DDNNL_BUILD_EXAMPLES=OFF -DDNNL_BUILD_TESTS=OFF -DDNNL_ENABLE_WORKLOAD=INFERENCE -DDNNL_ENABLE_PRIMITIVE="CONVOLUTION;REORDER" . && \
+    make -j$(nproc) install && \
+    cd .. && \
+    rm -r oneDNN-*
+
+
+# setup rust.
+RUN wget -O - https://sh.rustup.rs  | bash -s -- -y
+ENV PATH="/root/.cargo/bin:${PATH}"
+
+COPY crates crates
+
+WORKDIR /root/crates/tabby
+
+RUN cargo build --release
+
+RUN mkdir -p /opt/tabby/bin
+RUN cp target/release/tabby /opt/tabby/bin/
+
+RUN mkdir -p /opt/tabby/lib
+RUN cp $(dirname $(find target | grep lib/libctranslate2 | head -1))/libctranslate2*.so /opt/tabby/lib
+
+FROM nvidia/cuda:11.2.2-base-ubuntu20.04
+
+# We remove the cuda-compat package because it conflicts with the CUDA Enhanced Compatibility.
+# See e.g. https://github.com/NVIDIA/nvidia-docker/issues/1515
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        libcublas-11-2 \
+        libcudnn8=8.1.1.33-1+cuda11.2 \
+        libgomp1 \
+        python3-pip \
+        && \
+    apt-get purge -y cuda-compat-11-2 && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+ENV TABBY_ROOT=/opt/tabby
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TABBY_ROOT/lib
+
+COPY --from=builder $TABBY_ROOT $TABBY_ROOT
+
+ENTRYPOINT ["/opt/tabby/bin/tabby"]
--- a/crates/ctranslate2-bindings/build.rs
+++ b/crates/ctranslate2-bindings/build.rs
@ -1,20 +1,33 @@
 use cmake::Config;

 fn main() {
-	let dst = Config::new("CTranslate2")
-        // Default flags.
+    let mut config = Config::new("CTranslate2");
+    config
        .define("CMAKE_BUILD_TYPE", "Release")
        .define("BUILD_CLI", "OFF")
-        .define("CMAKE_INSTALL_RPATH_USE_LINK_PATH", "ON")
+        .define("BUILD_SHARED_LIBS", "ON")
+        .define("CMAKE_INSTALL_RPATH_USE_LINK_PATH", "ON");

-        // FIXME(meng): support linux build.
-        // OSX flags.
-        .define("CMAKE_OSX_ARCHITECTURES", "arm64")
-        .define("WITH_ACCELERATE", "ON")
-        .define("WITH_MKL", "OFF")
-        .define("OPENMP_RUNTIME", "NONE")
-        .define("WITH_RUY", "ON")
-        .build();
+    if cfg!(target_os = "macos") {
+        config
+            .define("CMAKE_OSX_ARCHITECTURES", "arm64")
+            .define("WITH_ACCELERATE", "ON")
+            .define("WITH_MKL", "OFF")
+            .define("OPENMP_RUNTIME", "NONE")
+            .define("WITH_RUY", "ON");
+    } else if cfg!(target_os = "linux") {
+        config
+            .define("WITH_CUDA", "ON")
+            .define("WITH_CUDNN", "ON")
+            .define("WITH_MKL", "ON")
+            .define("WITH_DNNL", "ON")
+            .define("OPENMP_RUNTIME", "COMP")
+            .cxxflag("-msse4.1")
+            .define("CUDA_NVCC_FLAGS", "-Xfatbin=-compress-all")
+            .define("CUDA_ARCH_LIST", "Common");
+    }
+
+    let dst = config.build();

    println!("cargo:rustc-link-search=native={}", dst.join("lib").display());
    println!("cargo:rustc-link-lib=ctranslate2");
--- a/crates/ctranslate2-bindings/include/ctranslate2.h
+++ b/crates/ctranslate2-bindings/include/ctranslate2.h
@ -1,6 +1,7 @@
 #pragma once

 #include "rust/cxx.h"
+#include <memory>

 namespace tabby {