tabby/crates/ctranslate2-bindings/src/lib.rs

use tokenizers::tokenizer::{Tokenizer};

#[macro_use]
extern crate derive_builder;

#[cxx::bridge(namespace = "tabby")]
mod ffi {
    unsafe extern "C++" {
        include!("ctranslate2-bindings/include/ctranslate2.h");

        type TextInferenceEngine;

        fn create_engine(
            model_path: &str,
            model_type: &str,
            device: &str,
            device_indices: &[i32],
            num_replicas_per_device: usize,
        ) -> UniquePtr<TextInferenceEngine>;

        fn inference(
            &self,
            tokens: &[String],
            max_decoding_length: usize,
            sampling_temperature: f32,
            beam_size: usize,
        ) -> Vec<String>;
    }
}

#[derive(Builder, Debug)]
pub struct TextInferenceEngineCreateOptions {
    model_path: String,

    model_type: String,

    tokenizer_path: String,

    device: String,

    device_indices: Vec<i32>,

    num_replicas_per_device: usize,
}

#[derive(Builder, Debug)]
pub struct TextInferenceOptions {
    #[builder(default = "256")]
    max_decoding_length: usize,

    #[builder(default = "1.0")]
    sampling_temperature: f32,

    #[builder(default = "2")]
    beam_size: usize,
}

pub struct TextInferenceEngine {
    engine: cxx::UniquePtr<ffi::TextInferenceEngine>,
    tokenizer: Tokenizer,
}

unsafe impl Send for TextInferenceEngine {}
unsafe impl Sync for TextInferenceEngine {}

impl TextInferenceEngine {
    pub fn create(options: TextInferenceEngineCreateOptions) -> Self where {
        let engine = ffi::create_engine(
            &options.model_path,
            &options.model_type,
            &options.device,
            &options.device_indices,
            options.num_replicas_per_device,
        );
        return TextInferenceEngine {
            engine: engine,
            tokenizer: Tokenizer::from_file(&options.tokenizer_path).unwrap(),
        };
    }

    pub fn inference(&self, prompt: &str, options: TextInferenceOptions) -> String {
        let encoding = self.tokenizer.encode(prompt, true).unwrap();
        let output_tokens = self.engine.inference(
            encoding.get_tokens(),
            options.max_decoding_length,
            options.sampling_temperature,
            options.beam_size,
        );
        let output_ids: Vec<u32> = output_tokens
            .iter()
            .filter_map(|x| {
                match self.tokenizer.token_to_id(x) {
                    Some(y) => Some(y),
                    None => { println!("Warning: token ({}) missed in vocab", x); None }
                }
            })
            .collect();
        self.tokenizer.decode(output_ids, true).unwrap()
    }
}
Support causal lm (decoder only model) (#151) * support * support causal lm 2023-05-27 08:26:33 +00:00			`use tokenizers::tokenizer::{Tokenizer};`
add ctranslate2-bindings / tabby rust packages (#146) * add ctranslate2-bindings * add fixme for linux build * turn off shared lib * add tabby-cli 2023-05-25 21:05:28 +00:00
			`#[macro_use]`
			`extern crate derive_builder;`

			`#[cxx::bridge(namespace = "tabby")]`
			`mod ffi {`
			`unsafe extern "C++" {`
			`include!("ctranslate2-bindings/include/ctranslate2.h");`

			`type TextInferenceEngine;`

feat: support cuda devices in rust tabby (#149) 2023-05-26 06:23:07 +00:00			`fn create_engine(`
			`model_path: &str,`
Support causal lm (decoder only model) (#151) * support * support causal lm 2023-05-27 08:26:33 +00:00			`model_type: &str,`
feat: support cuda devices in rust tabby (#149) 2023-05-26 06:23:07 +00:00			`device: &str,`
			`device_indices: &[i32],`
			`num_replicas_per_device: usize,`
			`) -> UniquePtr<TextInferenceEngine>;`

add ctranslate2-bindings / tabby rust packages (#146) * add ctranslate2-bindings * add fixme for linux build * turn off shared lib * add tabby-cli 2023-05-25 21:05:28 +00:00			`fn inference(`
			`&self,`
			`tokens: &[String],`
			`max_decoding_length: usize,`
			`sampling_temperature: f32,`
			`beam_size: usize,`
			`) -> Vec<String>;`
			`}`
			`}`

chore: remove unused lock 2023-05-26 07:06:08 +00:00			`#[derive(Builder, Debug)]`
feat: support cuda devices in rust tabby (#149) 2023-05-26 06:23:07 +00:00			`pub struct TextInferenceEngineCreateOptions {`
			`model_path: String,`

Support causal lm (decoder only model) (#151) * support * support causal lm 2023-05-27 08:26:33 +00:00			`model_type: String,`

feat: support cuda devices in rust tabby (#149) 2023-05-26 06:23:07 +00:00			`tokenizer_path: String,`

			`device: String,`

			`device_indices: Vec<i32>,`

			`num_replicas_per_device: usize,`
			`}`

add ctranslate2-bindings / tabby rust packages (#146) * add ctranslate2-bindings * add fixme for linux build * turn off shared lib * add tabby-cli 2023-05-25 21:05:28 +00:00			`#[derive(Builder, Debug)]`
			`pub struct TextInferenceOptions {`
			`#[builder(default = "256")]`
			`max_decoding_length: usize,`

			`#[builder(default = "1.0")]`
			`sampling_temperature: f32,`

			`#[builder(default = "2")]`
			`beam_size: usize,`
			`}`

			`pub struct TextInferenceEngine {`
chore: remove unused lock 2023-05-26 07:06:08 +00:00			`engine: cxx::UniquePtr<ffi::TextInferenceEngine>,`
add ctranslate2-bindings / tabby rust packages (#146) * add ctranslate2-bindings * add fixme for linux build * turn off shared lib * add tabby-cli 2023-05-25 21:05:28 +00:00			`tokenizer: Tokenizer,`
			`}`

			`unsafe impl Send for TextInferenceEngine {}`
			`unsafe impl Sync for TextInferenceEngine {}`

			`impl TextInferenceEngine {`
feat: support cuda devices in rust tabby (#149) 2023-05-26 06:23:07 +00:00			`pub fn create(options: TextInferenceEngineCreateOptions) -> Self where {`
			`let engine = ffi::create_engine(`
			`&options.model_path,`
Support causal lm (decoder only model) (#151) * support * support causal lm 2023-05-27 08:26:33 +00:00			`&options.model_type,`
feat: support cuda devices in rust tabby (#149) 2023-05-26 06:23:07 +00:00			`&options.device,`
			`&options.device_indices,`
			`options.num_replicas_per_device,`
			`);`
add ctranslate2-bindings / tabby rust packages (#146) * add ctranslate2-bindings * add fixme for linux build * turn off shared lib * add tabby-cli 2023-05-25 21:05:28 +00:00			`return TextInferenceEngine {`
chore: remove unused lock 2023-05-26 07:06:08 +00:00			`engine: engine,`
feat: support cuda devices in rust tabby (#149) 2023-05-26 06:23:07 +00:00			`tokenizer: Tokenizer::from_file(&options.tokenizer_path).unwrap(),`
add ctranslate2-bindings / tabby rust packages (#146) * add ctranslate2-bindings * add fixme for linux build * turn off shared lib * add tabby-cli 2023-05-25 21:05:28 +00:00			`};`
			`}`

			`pub fn inference(&self, prompt: &str, options: TextInferenceOptions) -> String {`
			`let encoding = self.tokenizer.encode(prompt, true).unwrap();`
chore: remove unused lock 2023-05-26 07:06:08 +00:00			`let output_tokens = self.engine.inference(`
add ctranslate2-bindings / tabby rust packages (#146) * add ctranslate2-bindings * add fixme for linux build * turn off shared lib * add tabby-cli 2023-05-25 21:05:28 +00:00			`encoding.get_tokens(),`
			`options.max_decoding_length,`
			`options.sampling_temperature,`
			`options.beam_size,`
			`);`
			`let output_ids: Vec<u32> = output_tokens`
			`.iter()`
Support causal lm (decoder only model) (#151) * support * support causal lm 2023-05-27 08:26:33 +00:00			`.filter_map(\|x\| {`
			`match self.tokenizer.token_to_id(x) {`
			`Some(y) => Some(y),`
			`None => { println!("Warning: token ({}) missed in vocab", x); None }`
			`}`
			`})`
add ctranslate2-bindings / tabby rust packages (#146) * add ctranslate2-bindings * add fixme for linux build * turn off shared lib * add tabby-cli 2023-05-25 21:05:28 +00:00			`.collect();`
			`self.tokenizer.decode(output_ids, true).unwrap()`
			`}`
			`}`