tabby/crates/ctranslate2-bindings/src/lib.rs

use tokenizers::tokenizer::{Model, Tokenizer};

#[macro_use]
extern crate derive_builder;

#[cxx::bridge(namespace = "tabby")]
mod ffi {
    unsafe extern "C++" {
        include!("ctranslate2-bindings/include/ctranslate2.h");

        type TextInferenceEngine;

        fn create_engine(
            model_path: &str,
            device: &str,
            device_indices: &[i32],
            num_replicas_per_device: usize,
        ) -> UniquePtr<TextInferenceEngine>;

        fn inference(
            &self,
            tokens: &[String],
            max_decoding_length: usize,
            sampling_temperature: f32,
            beam_size: usize,
        ) -> Vec<String>;
    }
}

#[derive(Builder, Debug)]
pub struct TextInferenceEngineCreateOptions {
    model_path: String,

    tokenizer_path: String,

    device: String,

    device_indices: Vec<i32>,

    num_replicas_per_device: usize,
}

#[derive(Builder, Debug)]
pub struct TextInferenceOptions {
    #[builder(default = "256")]
    max_decoding_length: usize,

    #[builder(default = "1.0")]
    sampling_temperature: f32,

    #[builder(default = "2")]
    beam_size: usize,
}

pub struct TextInferenceEngine {
    engine: cxx::UniquePtr<ffi::TextInferenceEngine>,
    tokenizer: Tokenizer,
}

unsafe impl Send for TextInferenceEngine {}
unsafe impl Sync for TextInferenceEngine {}

impl TextInferenceEngine {
    pub fn create(options: TextInferenceEngineCreateOptions) -> Self where {
        let engine = ffi::create_engine(
            &options.model_path,
            &options.device,
            &options.device_indices,
            options.num_replicas_per_device,
        );
        return TextInferenceEngine {
            engine: engine,
            tokenizer: Tokenizer::from_file(&options.tokenizer_path).unwrap(),
        };
    }

    pub fn inference(&self, prompt: &str, options: TextInferenceOptions) -> String {
        let encoding = self.tokenizer.encode(prompt, true).unwrap();
        let output_tokens = self.engine.inference(
            encoding.get_tokens(),
            options.max_decoding_length,
            options.sampling_temperature,
            options.beam_size,
        );

        let model = self.tokenizer.get_model();
        let output_ids: Vec<u32> = output_tokens
            .iter()
            .map(|x| model.token_to_id(x).unwrap())
            .collect();
        self.tokenizer.decode(output_ids, true).unwrap()
    }
}
add ctranslate2-bindings / tabby rust packages (#146) * add ctranslate2-bindings * add fixme for linux build * turn off shared lib * add tabby-cli 2023-05-25 21:05:28 +00:00			`use tokenizers::tokenizer::{Model, Tokenizer};`

			`#[macro_use]`
			`extern crate derive_builder;`

			`#[cxx::bridge(namespace = "tabby")]`
			`mod ffi {`
			`unsafe extern "C++" {`
			`include!("ctranslate2-bindings/include/ctranslate2.h");`

			`type TextInferenceEngine;`

feat: support cuda devices in rust tabby (#149) 2023-05-26 06:23:07 +00:00			`fn create_engine(`
			`model_path: &str,`
			`device: &str,`
			`device_indices: &[i32],`
			`num_replicas_per_device: usize,`
			`) -> UniquePtr<TextInferenceEngine>;`

add ctranslate2-bindings / tabby rust packages (#146) * add ctranslate2-bindings * add fixme for linux build * turn off shared lib * add tabby-cli 2023-05-25 21:05:28 +00:00			`fn inference(`
			`&self,`
			`tokens: &[String],`
			`max_decoding_length: usize,`
			`sampling_temperature: f32,`
			`beam_size: usize,`
			`) -> Vec<String>;`
			`}`
			`}`

chore: remove unused lock 2023-05-26 07:06:08 +00:00			`#[derive(Builder, Debug)]`
feat: support cuda devices in rust tabby (#149) 2023-05-26 06:23:07 +00:00			`pub struct TextInferenceEngineCreateOptions {`
			`model_path: String,`

			`tokenizer_path: String,`

			`device: String,`

			`device_indices: Vec<i32>,`

			`num_replicas_per_device: usize,`
			`}`

add ctranslate2-bindings / tabby rust packages (#146) * add ctranslate2-bindings * add fixme for linux build * turn off shared lib * add tabby-cli 2023-05-25 21:05:28 +00:00			`#[derive(Builder, Debug)]`
			`pub struct TextInferenceOptions {`
			`#[builder(default = "256")]`
			`max_decoding_length: usize,`

			`#[builder(default = "1.0")]`
			`sampling_temperature: f32,`

			`#[builder(default = "2")]`
			`beam_size: usize,`
			`}`

			`pub struct TextInferenceEngine {`
chore: remove unused lock 2023-05-26 07:06:08 +00:00			`engine: cxx::UniquePtr<ffi::TextInferenceEngine>,`
add ctranslate2-bindings / tabby rust packages (#146) * add ctranslate2-bindings * add fixme for linux build * turn off shared lib * add tabby-cli 2023-05-25 21:05:28 +00:00			`tokenizer: Tokenizer,`
			`}`

			`unsafe impl Send for TextInferenceEngine {}`
			`unsafe impl Sync for TextInferenceEngine {}`

			`impl TextInferenceEngine {`
feat: support cuda devices in rust tabby (#149) 2023-05-26 06:23:07 +00:00			`pub fn create(options: TextInferenceEngineCreateOptions) -> Self where {`
			`let engine = ffi::create_engine(`
			`&options.model_path,`
			`&options.device,`
			`&options.device_indices,`
			`options.num_replicas_per_device,`
			`);`
add ctranslate2-bindings / tabby rust packages (#146) * add ctranslate2-bindings * add fixme for linux build * turn off shared lib * add tabby-cli 2023-05-25 21:05:28 +00:00			`return TextInferenceEngine {`
chore: remove unused lock 2023-05-26 07:06:08 +00:00			`engine: engine,`
feat: support cuda devices in rust tabby (#149) 2023-05-26 06:23:07 +00:00			`tokenizer: Tokenizer::from_file(&options.tokenizer_path).unwrap(),`
add ctranslate2-bindings / tabby rust packages (#146) * add ctranslate2-bindings * add fixme for linux build * turn off shared lib * add tabby-cli 2023-05-25 21:05:28 +00:00			`};`
			`}`

			`pub fn inference(&self, prompt: &str, options: TextInferenceOptions) -> String {`
			`let encoding = self.tokenizer.encode(prompt, true).unwrap();`
chore: remove unused lock 2023-05-26 07:06:08 +00:00			`let output_tokens = self.engine.inference(`
add ctranslate2-bindings / tabby rust packages (#146) * add ctranslate2-bindings * add fixme for linux build * turn off shared lib * add tabby-cli 2023-05-25 21:05:28 +00:00			`encoding.get_tokens(),`
			`options.max_decoding_length,`
			`options.sampling_temperature,`
			`options.beam_size,`
			`);`

			`let model = self.tokenizer.get_model();`
			`let output_ids: Vec<u32> = output_tokens`
			`.iter()`
			`.map(\|x\| model.token_to_id(x).unwrap())`
			`.collect();`
			`self.tokenizer.decode(output_ids, true).unwrap()`
			`}`
			`}`