Trait Definition

#[async_trait]
pub trait LlmService: Send + Sync {
    async fn chat(&self, request: ChatRequest) -> Result<ChatResponse>;
    async fn generate(&self, prompt: &str, config: GenerationConfig) -> Result<String>;
    async fn health_check(&self) -> Result<()>;
}

Module: magicaf_core::llm


Methods

chat

async fn chat(&self, request: ChatRequest) -> Result<ChatResponse>

Send a structured chat completion request.

ParameterTypeDescription
requestChatRequestThe chat completion request payload

Returns: ChatResponse containing the model’s reply, usage statistics, and metadata.

Errors: MagicError::LlmError, MagicError::HttpError, MagicError::SerializationError


generate

async fn generate(&self, prompt: &str, config: GenerationConfig) -> Result<String>

High-level convenience: turn a raw prompt into generated text.

ParameterTypeDescription
prompt&strThe input prompt
configGenerationConfigGeneration parameters

Returns: String — the generated text.


health_check

async fn health_check(&self) -> Result<()>

Verify the upstream LLM server is reachable.


DTO Types

ChatRole

#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub enum ChatRole {
    System,
    User,
    Assistant,
    Other,
}

ChatMessage

pub struct ChatMessage {
    pub role: ChatRole,
    pub content: String,
}

Convenience constructors:

ChatMessage::system("You are a helpful assistant.")
ChatMessage::user("What is MagicAF?")
ChatMessage::assistant("MagicAF is...")

ChatRequest

pub struct ChatRequest {
    pub model: String,
    pub messages: Vec<ChatMessage>,
    pub temperature: Option<f32>,
    pub top_p: Option<f32>,
    pub max_tokens: Option<u32>,
    pub stop: Option<Vec<String>>,
}

Follows the OpenAI /v1/chat/completions request schema. The model field is auto-filled by LocalLlmService if left empty.

ChatResponse

pub struct ChatResponse {
    pub id: String,
    pub object: String,
    pub created: u64,
    pub model: String,
    pub choices: Vec<ChatChoice>,
    pub usage: Option<Usage>,
}

Helper method:

// Extract the text of the first choice
let text: Option<&str> = response.first_content();

ChatChoice

pub struct ChatChoice {
    pub index: u32,
    pub message: ChatMessage,
    pub finish_reason: Option<String>,
}

Usage

pub struct Usage {
    pub prompt_tokens: u32,
    pub completion_tokens: u32,
    pub total_tokens: u32,
}

LocalLlmService

Crate: magicaf-local-llm

HTTP client that calls any server exposing an OpenAI-compatible /v1/chat/completions endpoint.

Constructor

impl LocalLlmService {
    pub fn new(config: LlmConfig) -> Result<Self>
    pub fn model_name(&self) -> &str
}

Compatible Servers

ServerCommand / Notes
vLLMpython -m vllm.entrypoints.openai.api_server --model ... --port 8000
llama.cpp./llama-server -m model.gguf --port 8000
TGIText Generation Inference with OpenAI-compatible mode
LocalAIDrop-in OpenAI replacement
OllamaWith OpenAI compatibility layer enabled
CustomAny HTTP server mirroring the OpenAI Chat Completions schema

Example

use magicaf_core::config::LlmConfig;
use magicaf_local_llm::LocalLlmService;

let llm = LocalLlmService::new(LlmConfig {
    base_url: "http://localhost:8000/v1".into(),
    model_name: "mistral-7b".into(),
    api_key: None,
    timeout_secs: 120,
})?;

// Structured chat
let response = llm.chat(ChatRequest {
    model: String::new(), // Auto-filled from config
    messages: vec![
        ChatMessage::system("You are helpful."),
        ChatMessage::user("Explain RAG."),
    ],
    temperature: Some(0.3),
    top_p: None,
    max_tokens: Some(1024),
    stop: None,
}).await?;

println!("{}", response.first_content().unwrap_or(""));

// Simple generation
let text = llm.generate("Explain RAG in one sentence.", GenerationConfig::default()).await?;