diff --git a/.github/workflows/docker.rust.yml b/.github/workflows/docker.rust.yml new file mode 100644 index 0000000..9c05c02 --- /dev/null +++ b/.github/workflows/docker.rust.yml @@ -0,0 +1,90 @@ +name: Create and publish docker image + +on: + workflow_dispatch: + push: + branches: [ "main" ] + paths: + - 'crates/**' + +jobs: + build: + + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + # This is used to complete the identity challenge + # with sigstore/fulcio when running outside of PRs. + id-token: write + + steps: + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@main + with: + # this might remove tools that are actually needed, + # if set to "true" but frees about 6 GB + tool-cache: true + + # all of these default to true, but feel free to set to + # "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: false + swap-storage: true + + - name: Checkout repository + uses: actions/checkout@v3 + + # Workaround: https://github.com/docker/build-push-action/issues/461 + - name: Setup Docker buildx + uses: docker/setup-buildx-action@v2.0.0 + + # Login against a Docker registry except on PR + # https://github.com/docker/login-action + - name: Log into GitHub Container registry + if: github.event_name != 'pull_request' + uses: docker/login-action@v2.0.0 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Log into Docker Hub + if: github.event_name != 'pull_request' + uses: docker/login-action@v2.0.0 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Generate image name + run: | + echo "IMAGE_NAME=${GITHUB_REPOSITORY,,}-rust" >>${GITHUB_ENV} + + - uses: int128/docker-build-cache-config-action@v1 + id: cache + with: + image: ghcr.io/${{ env.IMAGE_NAME }}/cache + + # Build and push Docker image with Buildx (don't push on PR) + # https://github.com/docker/build-push-action + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@v3.1.1 + with: + file: Dockerfile.rust + context: . + push: ${{ github.event_name != 'pull_request' }} + tags: | + ghcr.io/${{ env.IMAGE_NAME }}:latest + ${{ env.IMAGE_NAME }}:latest + cache-from: ${{ steps.cache.outputs.cache-from }} + cache-to: ${{ steps.cache.outputs.cache-to }} + + - name: Docker Hub Description + uses: peter-evans/dockerhub-description@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + repository: tabbyml/tabby-rust diff --git a/Dockerfile.rust b/Dockerfile.rust new file mode 100644 index 0000000..e503f93 --- /dev/null +++ b/Dockerfile.rust @@ -0,0 +1,78 @@ +FROM nvidia/cuda:11.2.2-cudnn8-devel-ubuntu20.04 as builder + +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + python3-dev \ + python3-pip \ + wget \ + pkg-config \ + libssl-dev \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /root + +ENV ONEAPI_VERSION=2023.0.0 +ENV MKL_BUILD=25398 +RUN wget -q https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB && \ + apt-key add *.PUB && \ + rm *.PUB && \ + echo "deb https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/oneAPI.list && \ + apt-get update && \ + apt-get install -y --no-install-recommends \ + intel-oneapi-mkl-devel=$ONEAPI_VERSION-$MKL_BUILD \ + && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN python3 -m pip --no-cache-dir install cmake==3.22.* + +ENV ONEDNN_VERSION=3.0.1 +RUN wget -q https://github.com/oneapi-src/oneDNN/archive/refs/tags/v${ONEDNN_VERSION}.tar.gz && \ + tar xf *.tar.gz && \ + rm *.tar.gz && \ + cd oneDNN-* && \ + cmake -DCMAKE_BUILD_TYPE=Release -DDNNL_LIBRARY_TYPE=STATIC -DDNNL_BUILD_EXAMPLES=OFF -DDNNL_BUILD_TESTS=OFF -DDNNL_ENABLE_WORKLOAD=INFERENCE -DDNNL_ENABLE_PRIMITIVE="CONVOLUTION;REORDER" . && \ + make -j$(nproc) install && \ + cd .. && \ + rm -r oneDNN-* + + +# setup rust. +RUN wget -O - https://sh.rustup.rs | bash -s -- -y +ENV PATH="/root/.cargo/bin:${PATH}" + +COPY crates crates + +WORKDIR /root/crates/tabby + +RUN cargo build --release + +RUN mkdir -p /opt/tabby/bin +RUN cp target/release/tabby /opt/tabby/bin/ + +RUN mkdir -p /opt/tabby/lib +RUN cp $(dirname $(find target | grep lib/libctranslate2 | head -1))/libctranslate2*.so /opt/tabby/lib + +FROM nvidia/cuda:11.2.2-base-ubuntu20.04 + +# We remove the cuda-compat package because it conflicts with the CUDA Enhanced Compatibility. +# See e.g. https://github.com/NVIDIA/nvidia-docker/issues/1515 +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + libcublas-11-2 \ + libcudnn8=8.1.1.33-1+cuda11.2 \ + libgomp1 \ + python3-pip \ + && \ + apt-get purge -y cuda-compat-11-2 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +ENV TABBY_ROOT=/opt/tabby +ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TABBY_ROOT/lib + +COPY --from=builder $TABBY_ROOT $TABBY_ROOT + +ENTRYPOINT ["/opt/tabby/bin/tabby"] diff --git a/crates/ctranslate2-bindings/build.rs b/crates/ctranslate2-bindings/build.rs index 25c0f62..b725305 100644 --- a/crates/ctranslate2-bindings/build.rs +++ b/crates/ctranslate2-bindings/build.rs @@ -1,20 +1,33 @@ use cmake::Config; fn main() { - let dst = Config::new("CTranslate2") - // Default flags. + let mut config = Config::new("CTranslate2"); + config .define("CMAKE_BUILD_TYPE", "Release") .define("BUILD_CLI", "OFF") - .define("CMAKE_INSTALL_RPATH_USE_LINK_PATH", "ON") + .define("BUILD_SHARED_LIBS", "ON") + .define("CMAKE_INSTALL_RPATH_USE_LINK_PATH", "ON"); - // FIXME(meng): support linux build. - // OSX flags. - .define("CMAKE_OSX_ARCHITECTURES", "arm64") - .define("WITH_ACCELERATE", "ON") - .define("WITH_MKL", "OFF") - .define("OPENMP_RUNTIME", "NONE") - .define("WITH_RUY", "ON") - .build(); + if cfg!(target_os = "macos") { + config + .define("CMAKE_OSX_ARCHITECTURES", "arm64") + .define("WITH_ACCELERATE", "ON") + .define("WITH_MKL", "OFF") + .define("OPENMP_RUNTIME", "NONE") + .define("WITH_RUY", "ON"); + } else if cfg!(target_os = "linux") { + config + .define("WITH_CUDA", "ON") + .define("WITH_CUDNN", "ON") + .define("WITH_MKL", "ON") + .define("WITH_DNNL", "ON") + .define("OPENMP_RUNTIME", "COMP") + .cxxflag("-msse4.1") + .define("CUDA_NVCC_FLAGS", "-Xfatbin=-compress-all") + .define("CUDA_ARCH_LIST", "Common"); + } + + let dst = config.build(); println!("cargo:rustc-link-search=native={}", dst.join("lib").display()); println!("cargo:rustc-link-lib=ctranslate2"); diff --git a/crates/ctranslate2-bindings/include/ctranslate2.h b/crates/ctranslate2-bindings/include/ctranslate2.h index ba503c6..2cb0df6 100644 --- a/crates/ctranslate2-bindings/include/ctranslate2.h +++ b/crates/ctranslate2-bindings/include/ctranslate2.h @@ -1,6 +1,7 @@ #pragma once #include "rust/cxx.h" +#include namespace tabby {