diff --git a/Cargo.lock b/Cargo.lock
index 682de48..62106b9 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2138,6 +2138,7 @@ dependencies = [
  "futures",
  "tabby-inference",
  "tokio",
+ "tracing",
 ]
 
 [[package]]
diff --git a/crates/llama-cpp-bindings/Cargo.toml b/crates/llama-cpp-bindings/Cargo.toml
index a069c1c..6df84ac 100644
--- a/crates/llama-cpp-bindings/Cargo.toml
+++ b/crates/llama-cpp-bindings/Cargo.toml
@@ -18,3 +18,4 @@ tabby-inference = { path = "../tabby-inference" }
 derive_builder = { workspace = true }
 futures.workspace = true
 async-stream.workspace = true
+tracing.workspace = true
diff --git a/crates/llama-cpp-bindings/src/lib.rs b/crates/llama-cpp-bindings/src/lib.rs
index b2b2a32..207fd3d 100644
--- a/crates/llama-cpp-bindings/src/lib.rs
+++ b/crates/llama-cpp-bindings/src/lib.rs
@@ -1,3 +1,5 @@
+mod utils;
+
 use std::{collections::HashMap, sync::Arc};
 
 use async_stream::stream;
@@ -77,7 +79,7 @@ impl AsyncTextInferenceEngine {
         let result = match engine.as_mut().unwrap().step() {
             Ok(result) => result,
             Err(err) => {
-                panic!("Failed to step: {}", err)
+                fatal!("Failed to step: {}", err)
             }
         };
 
@@ -161,7 +163,7 @@ impl LlamaTextGeneration {
     pub fn create(options: LlamaTextGenerationOptions) -> Self {
         let engine = create_engine(options.use_gpu, &options.model_path);
         if engine.is_null() {
-            panic!("Unable to load model: {}", options.model_path);
+            fatal!("Unable to load model: {}", options.model_path);
         }
         let ret = LlamaTextGeneration {
             engine: Arc::new(AsyncTextInferenceEngine::create(engine)),
diff --git a/crates/llama-cpp-bindings/src/utils.rs b/crates/llama-cpp-bindings/src/utils.rs
new file mode 100644
index 0000000..5184479
--- /dev/null
+++ b/crates/llama-cpp-bindings/src/utils.rs
@@ -0,0 +1,16 @@
+#[macro_export]
+macro_rules! fatal {
+    ($msg:expr) => {
+        ({
+            tracing::error!($msg);
+            std::process::exit(1);
+        })
+    };
+
+    ($fmt:expr, $($arg:tt)*) => {
+        ({
+            tracing::error!($fmt, $($arg)*);
+            std::process::exit(1);
+        })
+    };
+}