Zvex

Elixir bindings for zvec, an in-process vector database. Zvex provides type-safe, idiomatic Elixir access to zvec's vector indexing and similarity search through Zig-based NIF bindings.

Features

Installation

Add zvex to your dependencies in mix.exs:

def deps do
  [
    {:zvex, "~> 0.1.0"}
  ]
end

zvex ships precompiled NIFs for the targets listed below. On mix deps.compile, the matching artifact is downloaded from the GitHub Releases for this repo and verified via SHA-256 against the checksum file packaged with Hex. No Zig, CMake, or C/C++ toolchain is required when a precompiled artifact matches the host.

Supported precompiled targets

Building from source

Any other Unix target (FreeBSD, x86_64-macOS, Alpine/musl, …) requires a source build. Force it on a supported target with ZVEX_BUILD=1 mix compile. Source builds need:

Windows is not supported.

Environment variables

Variable Effect
ZVEX_BUILD=1 Skip the precompiled download and build the NIF locally via Zigler.
ZVEC_REPO Override the upstream zvec git URL used for source fetches.
ZVEC_TAG Override the zvec git tag/ref fetched when building from source.

Quick Start

# Initialize the library
Zvex.initialize!()

# Define a collection schema
schema =
  Zvex.Collection.Schema.new("my_collection")
  |> Zvex.Collection.Schema.add_field("embedding", :vector_fp32, dimension: 128, index: :hnsw)
  |> Zvex.Collection.Schema.add_field("title", :string, index: :invert)

# Create and open a collection
{:ok, collection} = Zvex.Collection.create("/tmp/my_collection", schema)

# Build and insert a document
doc =
  Zvex.Document.new()
  |> Zvex.Document.put_pk(1)
  |> Zvex.Document.put("embedding", Zvex.Vector.from_list(:fp32, List.duplicate(0.5, 128)))
  |> Zvex.Document.put("title", "Hello world")

:ok = Zvex.Collection.insert(collection, [doc])

# Query nearest neighbors
results =
  Zvex.Query.new()
  |> Zvex.Query.field("embedding")
  |> Zvex.Query.vector(Zvex.Vector.from_list(:fp32, List.duplicate(0.5, 128)))
  |> Zvex.Query.top_k(10)
  |> Zvex.Query.hnsw(ef: 100)
  |> Zvex.Query.output_fields(["title"])
  |> Zvex.Query.execute!(collection)

# Clean up
Zvex.Collection.close!(collection)
Zvex.Collection.drop!("/tmp/my_collection")
Zvex.shutdown!()

API Overview

Initialization

# Default configuration
Zvex.initialize!()

# Custom configuration
config =
  Zvex.Config.new()
  |> Zvex.Config.memory_limit(1_073_741_824)
  |> Zvex.Config.query_threads(4)
  |> Zvex.Config.optimize_threads(2)
  |> Zvex.Config.log(:console, level: :info)

Zvex.initialize!(config)

Schema Definition

Schemas define the structure and indexing of a collection.

schema =
  Zvex.Collection.Schema.new("products")
  |> Zvex.Collection.Schema.add_field("embedding", :vector_fp32,
    dimension: 768,
    index: :hnsw,
    metric: :cosine,
    m: 16,
    ef_construction: 200
  )
  |> Zvex.Collection.Schema.add_field("name", :string, index: :invert)
  |> Zvex.Collection.Schema.add_field("price", :double)
  |> Zvex.Collection.Schema.add_field("tags", :array_string)

Data Types

Category Types
Dense vectors vector_fp32, vector_fp16, vector_fp64, vector_int4, vector_int8, vector_int16, vector_binary32, vector_binary64
Sparse vectors sparse_vector_fp16, sparse_vector_fp32
Scalars string, int32, int64, uint32, uint64, float, double, bool, binary
Arrays array_string, array_int32, array_int64, array_uint32, array_uint64, array_float, array_double, array_bool, array_binary

Index Types

Index Use Case Key Options
:hnsw Approximate nearest neighbor search :metric, :m, :ef_construction, :quantize
:ivf Large-scale partitioned search :metric, :n_list, :n_iters, :use_soar, :quantize
:flat Exact brute-force search :metric, :quantize
:invert Scalar field filtering :enable_range_opt, :enable_wildcard

Collections

# Create
{:ok, collection} = Zvex.Collection.create("/path/to/data", schema)

# Open existing
{:ok, collection} = Zvex.Collection.open("/path/to/data", schema)

# Open options
{:ok, collection} = Zvex.Collection.open("/path/to/data", schema,
  mmap: true,
  read_only: true,
  max_buffer_size: 67_108_864
)

# Maintenance
Zvex.Collection.flush(collection)
Zvex.Collection.optimize(collection)

# Introspection
{:ok, stats} = Zvex.Collection.stats(collection)
# => %Zvex.Collection.Stats{doc_count: 1000, indexes: [%{name: "embedding", completeness: 1.0}]}

# DDL operations
Zvex.Collection.create_index(collection, "new_field", index_params)
Zvex.Collection.drop_index(collection, "old_field")
Zvex.Collection.add_column(collection, "metadata", :string, "default_value")
Zvex.Collection.drop_column(collection, "metadata")

Documents

# Build documents
doc =
  Zvex.Document.new()
  |> Zvex.Document.put_pk(42)
  |> Zvex.Document.put("embedding", Zvex.Vector.from_list(:fp32, embedding_data))
  |> Zvex.Document.put("name", "Product A")
  |> Zvex.Document.put("price", 29.99)

# Build from a map (requires schema for type resolution)
doc = Zvex.Document.from_map(%{"pk" => 42, "name" => "Product A", "price" => 29.99}, schema)

# CRUD
Zvex.Collection.insert(collection, [doc1, doc2])
Zvex.Collection.update(collection, [updated_doc])
Zvex.Collection.upsert(collection, [doc])
Zvex.Collection.delete(collection, [42, 43])
Zvex.Collection.delete_by_filter(collection, "name = 'Product A'")

{:ok, docs} = Zvex.Collection.fetch(collection, [42])

Vectors

# Dense vectors
vec = Zvex.Vector.from_list(:fp32, [1.0, 2.0, 3.0])
vec = Zvex.Vector.from_binary(:fp16, binary_data)
list = Zvex.Vector.to_list(vec)
dim = Zvex.Vector.dimension(vec)

# Sparse vectors
vec = Zvex.Vector.from_sparse(:sparse_fp32, [0, 5, 10], [1.0, 0.5, 0.3])
{indices, values} = Zvex.Vector.to_sparse(vec)
nnz = Zvex.Vector.nnz(vec)

Queries

results =
  Zvex.Query.new()
  |> Zvex.Query.field("embedding")
  |> Zvex.Query.vector(query_vector)
  |> Zvex.Query.top_k(10)
  |> Zvex.Query.filter("price < 50.0")
  |> Zvex.Query.output_fields(["name", "price"])
  |> Zvex.Query.include_vector(true)
  |> Zvex.Query.hnsw(ef: 200)
  |> Zvex.Query.execute!(collection)

for result <- results do
  IO.puts("pk=#{result.pk} score=#{result.score} name=#{result.fields["name"]}")
end

Search algorithm options:

# HNSW - approximate, fast
|> Zvex.Query.hnsw(ef: 200)

# IVF - partitioned search
|> Zvex.Query.ivf(n_probe: 16)

# Flat - exact brute-force
|> Zvex.Query.flat()

Error Handling

All fallible functions come in two forms: fun/n returns {:ok, result} or {:error, error}, and fun!/n raises on failure.

Errors are structured via Splode into classes:

Class Errors
InvalidArgument, FailedPrecondition
NotFoundNotFound
ConflictAlreadyExists
UnavailablePermissionDenied, ResourceExhausted, Unavailable, NotSupported
InternalInternalError
UnknownUnknown

Benchmarks

Run benchmarks with mix aliases:

mix bench.vector       # Vector packing/unpacking
mix bench.document     # Document creation and serialization
mix bench.collection   # Insert, upsert, delete, fetch
mix bench.query        # Vector search performance
mix bench.all          # Everything

Development

# Run tests
mix test

# Run quality checks
mix check

# Generate docs
mix docs

License

See LICENSE for details.