support static linking of ctranslate2 (#148)

* support static linking of ctranslate2

* update

* remove submodule rust-cxx-cmake-bridge

* support alwayslink with whole-archive

* update

* move export_libs

* update docker config

* update ctranslate2

* remove

* update

* update build.rs

* parse external libs

* cleanup

* add cargo fmt
add-tracing
Meng Zhang 2023-05-26 21:34:31 -07:00 committed by GitHub
parent 3788710882
commit 06cf34a007
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 428 additions and 20 deletions

View File

@ -1,2 +1,3 @@
/target
/Cargo.lock
/build

View File

@ -0,0 +1,16 @@
cmake_minimum_required(VERSION 3.7)
project(ctranslate2_bindings)
add_subdirectory(CTranslate2)
add_library(dummy
src/dummy.cc
)
target_link_libraries(dummy
PRIVATE ctranslate2
)
include(cmake/export_libs.cmake)
export_all_target_libs(dummy)

@ -1 +1 @@
Subproject commit 692fb607ab67573fa5cf6e410aec24e8655844f8
Subproject commit 45af5ebcb643f205a6709e0bf6c09157d1ecba52

View File

@ -12,3 +12,4 @@ tokenizers = "0.13.3"
bindgen = "0.53.1"
cxx-build = "1.0"
cmake = "0.1"
rust-cxx-cmake-bridge = { path = "../rust-cxx-cmake-bridge" }

View File

@ -1,21 +1,15 @@
use cmake::Config;
use rust_cxx_cmake_bridge::read_cmake_generated;
fn main() {
let mut config = Config::new("CTranslate2");
let mut config = Config::new(".");
config
.define("CMAKE_BUILD_TYPE", "Release")
.define("BUILD_CLI", "OFF")
.define("BUILD_SHARED_LIBS", "ON")
.define("CMAKE_INSTALL_RPATH_USE_LINK_PATH", "ON");
.define("CMAKE_INSTALL_RPATH_USE_LINK_PATH", "ON")
.define("BUILD_SHARED_LIBS", "OFF");
if cfg!(target_os = "macos") {
config
.define("CMAKE_OSX_ARCHITECTURES", "arm64")
.define("WITH_ACCELERATE", "ON")
.define("WITH_MKL", "OFF")
.define("OPENMP_RUNTIME", "NONE")
.define("WITH_RUY", "ON");
} else if cfg!(target_os = "linux") {
if cfg!(target_os = "linux") {
config
.define("WITH_CUDA", "ON")
.define("WITH_CUDNN", "ON")
@ -24,16 +18,26 @@ fn main() {
.define("OPENMP_RUNTIME", "COMP")
.cxxflag("-msse4.1")
.define("CUDA_NVCC_FLAGS", "-Xfatbin=-compress-all")
.define("CUDA_ARCH_LIST", "Common");
}
.define("CUDA_ARCH_LIST", "Common")
} else if cfg!(target_os = "macos") {
config
.define("CMAKE_OSX_ARCHITECTURES", "arm64")
.define("WITH_ACCELERATE", "ON")
.define("WITH_MKL", "OFF")
.define("OPENMP_RUNTIME", "NONE")
.define("WITH_RUY", "ON")
} else {
panic!("Invalid target")
};
let dst = config.build();
println!(
"cargo:rustc-link-search=native={}",
dst.join("lib").display()
);
println!("cargo:rustc-link-lib=ctranslate2");
// Read static lib from generated deps.
let cmake_generated_libs_str = std::fs::read_to_string(
&format!("/{}/build/cmake_generated_libs", dst.display()).to_string(),
)
.unwrap();
read_cmake_generated(&cmake_generated_libs_str);
// Tell cargo to invalidate the built crate whenever the wrapper changes
println!("cargo:rerun-if-changed=include/ctranslate2.h");

View File

@ -0,0 +1,25 @@
#! /bin/bash
set -e
set -x
UNAME="$(uname -s)"
case "${UNAME}" in
Linux*) MACHINE=linux;;
Darwin*) MACHINE=macos;;
*) exit 1;;
esac
rm -rf build
mkdir build && cd build
if [[ "$MACHINE" == "macos" ]]; then
CMAKE_EXTRA_OPTIONS='-DCMAKE_OSX_ARCHITECTURES=arm64 -DWITH_ACCELERATE=ON -DWITH_MKL=OFF -DOPENMP_RUNTIME=NONE -DWITH_RUY=ON'
elif [[ "$MACHINE" == "linux" ]]; then
CMAKE_EXTRA_OPTIONS='-DWITH_CUDA=ON -DWITH_CUDNN=ON -DWITH_MKL=ON -DWITH_DNNL=ON -DOPENMP_RUNTIME=COMP -DCUDA_NVCC_FLAGS=-Xfatbin=-compress-all -DCUDA_ARCH_LIST=Common -DCXXFLAGS=-msse4.1'
fi
cmake -DBULID_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=Release -DBUILD_CLI=OFF -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=ON $CMAKE_EXTRA_OPTIONS ..
"$@"

View File

@ -0,0 +1,98 @@
################################################################################
# WARNING: to list the system libraries(ie IMPORTED) you MUST set:
# set_target_properties(your_lib PROPERTIES IMPORTED_GLOBAL TRUE)
# just after the find_package call
# cf https://gitlab.kitware.com/cmake/cmake/-/issues/17256
#
# https://stackoverflow.com/questions/32756195/recursive-list-of-link-libraries-in-cmake
# https://stackoverflow.com/questions/32197663/how-can-i-remove-the-the-location-property-may-not-be-read-from-target-error-i
function(_get_link_libraries OUTPUT_LIST TARGET)
list(APPEND VISITED_TARGETS ${TARGET})
# DO NOT switch on IMPORTED or not
# An INTERFACE library CAN have LINK_LIBRARIES!
# get_target_property(IMPORTED ${TARGET} IMPORTED)
set(LIBS "")
get_target_property(LIBS_1 ${TARGET} INTERFACE_LINK_LIBRARIES)
get_target_property(LIBS_2 ${TARGET} LINK_LIBRARIES)
list(APPEND LIBS ${LIBS_1} ${LIBS_2})
set(LIB_FILES "")
foreach(LIB ${LIBS})
if (TARGET ${LIB})
list(FIND VISITED_TARGETS ${LIB} VISITED)
if (${VISITED} EQUAL -1)
# OLD: get_target_property(LIB_FILE ${LIB} LOCATION)
# NEW:
_get_link_libraries(LINK_LIB_FILES ${LIB})
set(LIB_FILE ${LIB})
list(APPEND LIB_FILES ${LINK_LIB_FILES})
list(APPEND LIB_FILES ${LIB_FILE})
endif()
elseif(EXISTS ${LIB})
set(LIB_FILE ${LIB})
list(APPEND LIB_FILES ${LIB_FILE})
endif()
endforeach()
set(VISITED_TARGETS ${VISITED_TARGETS} PARENT_SCOPE)
set(${OUTPUT_LIST} ${LIB_FILES} PARENT_SCOPE)
endfunction()
################################################################################
function(export_all_target_libs TARGET)
# NOTE: get_target_property(CIRCUIT_LIB_LINK_LIBRARIES a_target LINK_LIBRARIES) is NOT transitive
# This function will return eg: "$<TARGET_FILE:rust_cxx>;$<TARGET_FILE:circuit_lib>;"
# b/c generator expression are evaluated LATER
# cf https://stackoverflow.com/questions/59226127/cmake-generator-expression-how-to-get-target-file-property-on-list-of-targets
set(ALL_LINK_LIBRARIES "")
_get_link_libraries(ALL_LINK_LIBRARIES ${TARGET})
message(STATUS "ALL_LINK_LIBRARIES : ${ALL_LINK_LIBRARIES}")
set(ALL_LIBS "")
set(ALL_EXTERNAL_LIBS "")
# TODO move that back into get_link_libraries
# NOTE: we MUST do it in 2 steps:
# - collect all the LINK_LIBRARIES recursively
# - loop on those and get their TARGET_FILE (if not INTERFACE_LIBRARY)
# That is b/c in get_link_libraries a INTERFACE_LIBRARY CAN have link_libraries
# but we CAN NOT evaluate generator expressions at this time.
foreach(LIB ${ALL_LINK_LIBRARIES})
# MUST skip INTERFACE else:
# CMake Error at src/CMakeLists.txt:136 (add_custom_command):
# Error evaluating generator expression:
# $<TARGET_FILE:rust_cxx>
# Target "rust_cxx" is not an executable or library.
# SHARED_LIBRARY,INTERFACE_LIBRARY,STATIC_LIBRARY
#
if (TARGET ${LIB})
get_target_property(LIB_TYPE ${LIB} TYPE)
message(STATUS "LIB_TYPE : ${LIB} = ${LIB_TYPE}")
if(NOT ${LIB_TYPE} STREQUAL "INTERFACE_LIBRARY")
set(LIB_FILE $<TARGET_FILE:${LIB}>)
list(APPEND ALL_LIBS ${LIB_FILE})
endif()
elseif(EXISTS ${LIB})
set(LIB_FILE ${LIB})
message(STATUS "LIB_TYPE : ${LIB} = EXTERNAL")
list(APPEND ALL_LIBS ${LIB_FILE})
endif()
endforeach() # LIB ${ALL_LIBS}
message(STATUS "ALL_LIBS : ${ALL_LIBS}")
# add_custom_command(ie echoing only to stdout) works but more difficult to get from build.rs
# b/c when there is "ninja: no work to do" it will NOT echo on the console
add_custom_command(
TARGET ${TARGET}
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E echo ${ALL_LIBS} > ${CMAKE_CURRENT_BINARY_DIR}/cmake_generated_libs
# OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/cmake_generated_libs
VERBATIM
)
endfunction(export_all_target_libs)

View File

View File

@ -0,0 +1,8 @@
[package]
name = "rust-cxx-cmake-bridge"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]

View File

@ -0,0 +1,250 @@
use std::io::Write;
use std::path::PathBuf;
// ## lib_name: "protobufd"
// ## dir: "..../api_circuits/target/debug/build/lib-circuits-wrapper-49025516ce40925e/out/build/_deps/protobuf_fetch-build"
// rustc-link-search=native=..../api_circuits/target/debug/build/lib-circuits-wrapper-49025516ce40925e/out/build/_deps/protobuf_fetch-build
// rustc-link-lib=protobufd
// ## lib_name: "libyosys"
// ## dir: "..../api_circuits/target/debug/build/lib-circuits-wrapper-49025516ce40925e/out/build/_deps/yosys_fetch-build"
// rustc-link-search=native=..../api_circuits/target/debug/build/lib-circuits-wrapper-49025516ce40925e/out/build/_deps/yosys_fetch-build
// rustc-link-lib=libyosys
// ## lib_name: "xxhash"
// ## dir: "..../api_circuits/target/debug/build/lib-circuits-wrapper-49025516ce40925e/out/build/_deps/xxhash-build"
// rustc-link-search=native=..../api_circuits/target/debug/build/lib-circuits-wrapper-49025516ce40925e/out/build/_deps/xxhash-build
// rustc-link-lib=xxhash
fn parse_lib_path_dir_and_name(static_lib_str: &str) -> (PathBuf, String, bool, bool, bool) {
let static_lib_path = std::path::Path::new(static_lib_str);
// NOTE: file_stem only split eg "libprotobufd.so.3.19.4.0" -> "libprotobufd.so.3.19.4"
// but that is NOT what we want (ie "libprotobufd")
// TODO use "file_prefix" https://github.com/rust-lang/rust/issues/86319
let liblib_name = static_lib_path.my_file_prefix().unwrap();
let liblib_name_str: String = liblib_name.to_str().unwrap().into();
let lib_name_str = liblib_name_str.trim_start_matches("lib");
// basically:
// - input = /.../target/debug/build/lib-circuits-wrapper-49025516ce40925e/out/build/_deps/glog-build/libglogd.so.0.6.0
// - get the extension: a (or "so.3.19.4" or "so" etc)
// NOTE: extension DOES NOT work(same issue than file_stem)
// eg ".../libglogd.so.0.6.0".extension() == "0" (ie the part after the last dot)
// and we NEED "so" (ie the part after the FIRST dot)
let file_with_ext = static_lib_path.file_name().unwrap();
let full_ext = file_with_ext
.to_str()
.unwrap()
.trim_start_matches(&liblib_name_str);
let is_static = full_ext.starts_with(".a");
let dir = static_lib_path.parent().unwrap();
// COULD probably have a more foolproof system by using the IMPORTED property in CMake
// and writing that to a different file(or the same one with a prefix/suffix?)
// NOTE: be sure that the prefix does not conflict with the Dockerfile WORKDIR /usr/src/app
let is_system = dir.starts_with("/usr/lib/");
let is_framework = static_lib_str.ends_with(".framework");
return (
dir.to_path_buf(),
lib_name_str.to_string(),
is_static,
is_system,
is_framework,
);
}
// Parse the content of "cmake_generated_rust_wrapper_libs" which SHOULD have
// been generated by our CMake function.
// It is expected to contain a list of space separated libraries eg:
// "/full/path/build/liblib1.so /full/path/build/liblib2.a /usr/lib/x86_64-linux-gnu/libpng16.so.16.37.0"
// etc
fn read_cmake_generated_to_output(
cmake_generated_rust_wrapper_libs_str: &str,
output: &mut impl Write,
) {
// Previous version was globing all .a and .so in the build dir but it only worked for SHARED dependencies.
// That is b/c when linking STATIC libs order matters! So we must get a proper list from CMake.
for static_lib_str in cmake_generated_rust_wrapper_libs_str
.split(&[' ', '\n'][..])
.filter(|&x| !x.is_empty())
{
let (dir, lib_name_str, is_static, is_system, is_framework) =
parse_lib_path_dir_and_name(static_lib_str);
// WARNING: we MUST add to the linker path:
// - NON system libs (obviously) wether SHARED or STATIC
// - system STATIC libs eg /usr/lib/x86_64-linux-gnu/libboost_filesystem.a else
// "error: could not find native static library `boost_filesystem`, perhaps an -L flag is missing?"
if !is_system || is_static || !is_framework {
writeln!(output, "cargo:rustc-link-search=native={}", dir.display()).unwrap();
}
writeln!(
output,
"cargo:rustc-link-lib={}={}",
if is_framework {
"framework"
} else if is_static {
"static"
} else {
"dylib"
},
lib_name_str
)
.unwrap();
}
}
pub fn read_cmake_generated(cmake_generated_rust_wrapper_libs_str: &str) {
read_cmake_generated_to_output(
&cmake_generated_rust_wrapper_libs_str,
&mut std::io::stdout(),
)
}
////////////////////////////////////////////////////////////////////////////////
/// TEMP
/// Implement "file_prefix"
/// copy pasted from https://github.com/rust-lang/rust/issues/86319
use std::ffi::OsStr;
trait HasMyFilePrefix {
fn my_file_prefix(&self) -> Option<&OsStr>;
}
impl HasMyFilePrefix for std::path::Path {
fn my_file_prefix(&self) -> Option<&OsStr> {
self.file_name()
.map(split_file_at_dot)
.and_then(|(before, _after)| Some(before))
}
}
fn split_file_at_dot(file: &OsStr) -> (&OsStr, Option<&OsStr>) {
let slice = os_str_as_u8_slice(file);
if slice == b".." {
return (file, None);
}
// The unsafety here stems from converting between &OsStr and &[u8]
// and back. This is safe to do because (1) we only look at ASCII
// contents of the encoding and (2) new &OsStr values are produced
// only from ASCII-bounded slices of existing &OsStr values.
let i = match slice[1..].iter().position(|b| *b == b'.') {
Some(i) => i + 1,
None => return (file, None),
};
let before = &slice[..i];
let after = &slice[i + 1..];
unsafe { (u8_slice_as_os_str(before), Some(u8_slice_as_os_str(after))) }
}
fn os_str_as_u8_slice(s: &OsStr) -> &[u8] {
unsafe { &*(s as *const OsStr as *const [u8]) }
}
unsafe fn u8_slice_as_os_str(s: &[u8]) -> &OsStr {
// SAFETY: see the comment of `os_str_as_u8_slice`
{
&*(s as *const [u8] as *const OsStr)
}
}
////////////////////////////////////////////////////////////////////////////////
#[cfg(test)]
mod tests {
use crate::{parse_lib_path_dir_and_name, read_cmake_generated_to_output};
#[test]
fn parse_local_lib_static_ok() {
let (dir, lib_name_str, is_static, is_system) =
parse_lib_path_dir_and_name("/some/path/liblibstatic.a");
assert_eq!(dir.as_os_str(), "/some/path");
assert_eq!(lib_name_str, "libstatic");
assert_eq!(is_static, true);
assert_eq!(is_system, false);
}
#[test]
fn parse_local_lib_shared_ok() {
let (dir, lib_name_str, is_static, is_system) =
parse_lib_path_dir_and_name("/some/path/liblibshared.so");
assert_eq!(dir.as_os_str(), "/some/path");
assert_eq!(lib_name_str, "libshared");
assert_eq!(is_static, false);
assert_eq!(is_system, false);
}
#[test]
fn parse_local_lib_shared_with_soversion_ok() {
let (dir, lib_name_str, is_static, is_system) =
parse_lib_path_dir_and_name("/some/path/liblibshared.so.1.2.3");
assert_eq!(dir.as_os_str(), "/some/path");
assert_eq!(lib_name_str, "libshared");
assert_eq!(is_static, false);
assert_eq!(is_system, false);
}
#[test]
fn parse_system_lib_static_ok() {
let (dir, lib_name_str, is_static, is_system) =
parse_lib_path_dir_and_name("/usr/lib/libsystem1.a");
assert_eq!(dir.as_os_str(), "/usr/lib");
assert_eq!(lib_name_str, "system1");
assert_eq!(is_static, true);
assert_eq!(is_system, true);
}
#[test]
fn parse_system_lib_shared_ok() {
let (dir, lib_name_str, is_static, is_system) =
parse_lib_path_dir_and_name("/usr/lib/libsystem2.so");
assert_eq!(dir.as_os_str(), "/usr/lib");
assert_eq!(lib_name_str, "system2");
assert_eq!(is_static, false);
assert_eq!(is_system, true);
}
#[test]
fn test_read_cmake_generated_to_output() {
let input = "/some/libA.a /some/libB.so";
let mut stdout = Vec::new();
read_cmake_generated_to_output(input, &mut stdout);
assert_eq!(
std::str::from_utf8(&stdout).unwrap(),
"cargo:rustc-link-search=native=/some\n\
cargo:rustc-link-lib=static=A\n\
cargo:rustc-link-search=native=/some\n\
cargo:rustc-link-lib=dylib=B\n"
);
}
// no need to touch "rustc-link-search" to link with eg "/usr/lib/x86_64-linux-gnu/libpng16.so.16.37.0"
// simply "cargo:rustc-link-lib=dylib=png16.so" is OK
#[test]
fn test_read_cmake_generated_to_output_system_shared_no_rustc_link_search() {
let input = "/usr/lib/x86_64-linux-gnu/libpng16.so.16.37.0";
let mut stdout = Vec::new();
read_cmake_generated_to_output(input, &mut stdout);
assert_eq!(
std::str::from_utf8(&stdout).unwrap(),
"cargo:rustc-link-lib=dylib=png16\n"
);
}
// BUT system STATIC libs require "rustc-link-search"??
#[test]
fn test_read_cmake_generated_to_output_system_static_rustc_link_search() {
let input = "/usr/lib/x86_64-linux-gnu/libpng16.a";
let mut stdout = Vec::new();
read_cmake_generated_to_output(input, &mut stdout);
assert_eq!(
std::str::from_utf8(&stdout).unwrap(),
"cargo:rustc-link-search=native=/usr/lib/x86_64-linux-gnu\n\
cargo:rustc-link-lib=static=png16\n"
);
}
}

View File

@ -538,6 +538,7 @@ dependencies = [
"cxx",
"cxx-build",
"derive_builder",
"rust-cxx-cmake-bridge",
"tokenizers",
]
@ -1804,6 +1805,10 @@ dependencies = [
"winreg",
]
[[package]]
name = "rust-cxx-cmake-bridge"
version = "0.1.0"
[[package]]
name = "rust-embed"
version = "6.6.1"

View File

@ -49,7 +49,7 @@ pub struct ServeArgs {
#[clap(long)]
model: String,
#[clap(long, default_value_t=8080)]
#[clap(long, default_value_t = 8080)]
port: u16,
#[clap(long, default_value_t=Device::CPU)]