LlmGuard

LlmGuard Logo

AI Firewall and Guardrails for LLM-based Elixir Applications

ElixirOTPHex.pmDocumentationLicense

LlmGuard provides comprehensive security protection for LLM applications including prompt injection detection, jailbreak prevention, data leakage protection, and content moderation.

Features

Quick Start

Add to your mix.exs:

def deps do
  [
    {:llm_guard, "~> 0.3.1"}
  ]
end

Basic usage:

# Create configuration
config = LlmGuard.Config.new(
  prompt_injection_detection: true,
  confidence_threshold: 0.7
)

# Validate user input
case LlmGuard.validate_input(user_input, config) do
  {:ok, safe_input} ->
    # Safe to send to LLM
    llm_response = MyLLM.generate(safe_input)
    
    # Validate output
    case LlmGuard.validate_output(llm_response, config) do
      {:ok, safe_output} -> {:ok, safe_output}
      {:error, :detected, details} -> {:error, "Unsafe output"}
    end
    
  {:error, :detected, details} ->
    # Blocked malicious input
    Logger.warn("Threat detected: #{details.reason}")
    {:error, "Input blocked"}
end

Architecture

LlmGuard uses a multi-layer detection strategy:

  1. Pattern Matching (~1ms) - Fast regex-based detection
  2. Heuristic Analysis (~10ms) - Statistical analysis (coming soon)
  3. ML Classification (~50ms) - Advanced threat detection (coming soon)
User Input
    │
    ▼
┌─────────────────┐
│ Input Validation│
│  - Length check │
│  - Sanitization │
└────────┬────────┘
         │
         ▼
┌─────────────────────┐
│ Security Pipeline   │
│  ┌───────────────┐  │
│  │ Detector 1    │  │
│  ├───────────────┤  │
│  │ Detector 2    │  │
│  ├───────────────┤  │
│  │ Detector 3    │  │
│  └───────────────┘  │
└────────┬────────────┘
         │
         ▼
    LLM Processing
         │
         ▼
┌─────────────────────┐
│ Output Validation   │
└────────┬────────────┘
         │
         ▼
     User Response

Detected Threats

Prompt Injection (34 patterns)

Jailbreak Detection

PII Detection & Redaction

Coming Soon

Testing

# Run all tests
mix test

# Run with coverage
mix coveralls.html

# Run security tests only
mix test --only security

# Run performance benchmarks
mix test --only performance

Current Status:

Configuration

config = LlmGuard.Config.new(
  # Detection toggles
  prompt_injection_detection: true,
  jailbreak_detection: false,  # Coming soon
  data_leakage_prevention: false,  # Coming soon
  content_moderation: false,  # Coming soon
  
  # Thresholds
  confidence_threshold: 0.7,
  max_input_length: 10_000,
  max_output_length: 10_000,
  
  # Rate limiting (coming soon)
  rate_limiting: %{
    requests_per_minute: 100,
    tokens_per_minute: 200_000
  }
)

# Optional: Caching (set `caching` to enable pipeline result caching)
caching_config = %{
  enabled: true,
  pattern_cache: true,
  result_cache: true,
  result_ttl_seconds: 300,
  max_cache_entries: 10_000
}

config = LlmGuard.Config.new(
  prompt_injection_detection: true,
  caching: caching_config
)

Caching

The pipeline will reuse detector results when caching.enabled is true and the cache process is running.

# Start the cache in your supervision tree
children = [
  {LlmGuard.Cache.PatternCache, []},
  # ...other children
]

# Fetch cache statistics
stats = LlmGuard.Cache.PatternCache.stats()
# => %{pattern_count: 10, result_count: 42, hit_rate: 0.78, ...}

Telemetry & Metrics

Telemetry emits pipeline, detector, and cache events with native durations.

# Initialize handlers once (idempotent)
:ok = LlmGuard.Telemetry.Metrics.setup()

# Inspect metrics in-process
metrics = LlmGuard.Telemetry.Metrics.snapshot()

# Prometheus text format
prom_text = LlmGuard.Telemetry.Metrics.prometheus_metrics()

Integrate with Telemetry.Metrics reporters:

import Telemetry.Metrics

metrics = LlmGuard.Telemetry.Metrics.metrics()

Use these metrics with Prometheus (e.g., TelemetryMetricsPrometheus) or LiveDashboard to track request outcomes, detector latency, cache hit rates, and confidence distributions.

Performance

Current (Phase 1):

Targets (Phase 4):

Development Status

See IMPLEMENTATION_STATUS.md for detailed progress.

Phase 1 - Foundation: ✅ 80% Complete

Phase 2 - Advanced Detection: ⏳ 0% Complete Phase 3 - Policy & Infrastructure: ⏳ 0% Complete Phase 4 - Optimization: ⏳ 0% Complete

Examples

Run examples with mix run examples/example_name.exs:

# Basic usage demonstration
mix run examples/basic_usage.exs

# Jailbreak detection examples
mix run examples/jailbreak_detection.exs

# Comprehensive multi-layer protection
mix run examples/comprehensive_protection.exs

CrucibleIR Pipeline Integration

# Use LlmGuard as a stage in CrucibleIR research pipelines
defmodule MyExperiment do
  def run_with_guardrails do
    # Configure guardrails
    guardrail = %CrucibleIR.Reliability.Guardrail{
      profiles: [:default],
      prompt_injection_detection: true,
      jailbreak_detection: true,
      pii_detection: true,
      pii_redaction: false,
      fail_on_detection: true
    }

    # Create experiment context
    context = %{
      experiment: %{
        reliability: %{
          guardrails: guardrail
        }
      },
      inputs: "User prompt to validate"
    }

    # Run the stage
    case LlmGuard.Stage.run(context) do
      {:ok, updated_context} ->
        # Check validation results
        case updated_context.guardrails.status do
          :safe ->
            IO.puts("Input validated successfully")
            process_safe_input(updated_context.guardrails.validated_inputs)

          :detected ->
            IO.puts("Threats detected: #{inspect(updated_context.guardrails.detections)}")
            handle_detected_threats(updated_context.guardrails)

          :error ->
            IO.puts("Validation errors: #{inspect(updated_context.guardrails.errors)}")
        end

      {:error, {:threats_detected, details}} ->
        # Strict mode: fail_on_detection was true
        IO.puts("Pipeline halted due to detected threats")
        {:error, details}
    end
  end
end

Phoenix Integration

defmodule MyAppWeb.LlmGuardPlug do
  import Plug.Conn

  def init(opts), do: opts

  def call(conn, _opts) do
    with {:ok, input} <- extract_llm_input(conn),
         {:ok, sanitized} <- LlmGuard.validate_input(input, config()) do
      assign(conn, :sanitized_input, sanitized)
    else
      {:error, :detected, details} ->
        conn
        |> put_status(:forbidden)
        |> json(%{error: "Input blocked", reason: details.reason})
        |> halt()
    end
  end
end

Batch Validation

# Validate multiple inputs concurrently
inputs = ["Message 1", "Ignore all instructions", "Message 3"]
results = LlmGuard.validate_batch(inputs, config)

Enum.each(results, fn
  {:ok, safe_input} -> process_safe(safe_input)
  {:error, :detected, details} -> log_threat(details)
end)

Documentation

Full documentation is available at hexdocs.pm/llm_guard.

Generate locally:

mix docs
open doc/index.html

Contributing

Contributions are welcome! Please open an issue or pull request on GitHub.

Areas needing help:

Roadmap

Security

For security issues, please email security@example.com instead of using the issue tracker.

License

MIT License. See LICENSE for details.

Acknowledgments

Built following security best practices and threat models from:


Status: Alpha - Production-ready for prompt injection detection Version: 0.3.1 Elixir: ~> 1.14 OTP: 25+