Razmetka
Priority-dispatch sentence classifier with pluggable ML fallback for Russian NLP.
Part of the natasha-ex ecosystem. Extends yargy's defmatch bag-of-features matchers with ordered dispatch and ML classifier fallback.
- Grammar-first: bag-of-features rules checked in priority order
-
ML fallback: pluggable classifier (
Razmetka.Classifierbehaviour) for unmatched sentences - Zero coupling: works with FRIDA, fastText, or any embedding model
-
Reuses yargy's terminal predicates (
lemma,gram,token, etc.)
Installation
def deps do
[
{:razmetka, "~> 0.1"}
]
endUsage
defmodule MyApp.SentenceClassifier do
use Yargy.Grammar
use Razmetka
# Bag-of-features matchers (from yargy)
defmatch(:demand, any_token(all([lemma(~w[требовать просить взыскать]), gram("VERB")])))
defmatch(:norm_framing, any_token(lemma(~w[соответствие согласно основание])))
defmatch(:evidence, all_of([
any_token(lemma(~w[подтверждаться подтвердить])),
any_token(lemma(~w[акт квитанция чек выписка]))
]))
# Priority dispatch with standard Elixir boolean expressions
defclassify classifier: MyApp.FridaClassifier, default: :fact do
:demand -> demand?()
:norm -> norm_framing?()
:evidence -> evidence?()
end
endClassify text
MyApp.SentenceClassifier.classify_text("Истец требует возмещения убытков")
#=> {:demand, %{confidence: :grammar}}
MyApp.SentenceClassifier.classify_text("Товар был поставлен 20 октября")
#=> {:fact, %{confidence: :classifier, score: 0.72}}Pre-tokenized input
tokens = Yargy.Pipeline.morph_tokenize("Истец требует возмещения")
MyApp.SentenceClassifier.classify(tokens, "Истец требует возмещения")
#=> {:demand, %{confidence: :grammar}}Pluggable classifiers
Implement the Razmetka.Classifier behaviour:
defmodule MyApp.FridaClassifier do
@behaviour Razmetka.Classifier
@impl true
def classify(text, _opts) do
clf = MyApp.ClassifierServer.get()
{type, score} = MyApp.NLP.Classifier.classify_one(clf, text)
{type, score}
end
end
The callback returns {type, score} where score is 0.0–1.0. Razmetka
compares against :threshold — below it, the :default type is used.
How it works
classify_text("В соответствии со ст. 309 ГК РФ...")
│
├─ tokenize + morph-tag (once, via yargy)
│
├─ Try :demand → no conjugated demand verb → skip
├─ Try :norm → has "соответствие" ✓ → MATCH
│ → {:norm, %{confidence: :grammar}}
│
└─ (never reaches :evidence or classifier)classify_text("Товар был поставлен 20 октября.")
│
├─ tokenize + morph-tag
│
├─ Try :demand → skip
├─ Try :norm → skip
├─ Try :evidence → skip
├─ Classifier fallback → FRIDA → {:fact, 0.72}
│ → {:fact, %{confidence: :classifier, score: 0.72}}Compound conditions
Use standard Elixir and, or, not, and parentheses.
Two variables are in scope: tokens and text.
defmodule MyApp.LegalClassifier do
use Yargy.Grammar
use Razmetka
defmatch(:title_base, any_token(lemma("претензия")))
defmatch(:pretrial, any_token(lemma("досудебный")))
defmatch(:short, max_words(5))
defmatch(:demand_verb, any_token(all([lemma(~w[требовать просить]), gram("VERB")])))
defmatch(:norm_framing, any_token(lemma(~w[соответствие согласно])))
def has_law_ref?(_, text), do: String.contains?(text, "ст.")
defclassify default: :unknown do
:procedural_title -> title_base?() and (pretrial?() or short?())
:norm -> has_law_ref?(tokens, text) and norm_framing?()
:demand -> demand_verb?()
:not_demand -> not demand_verb?()
end
endWithout classifier
defmodule MyApp.SimpleClassifier do
use Yargy.Grammar
use Razmetka
defmatch(:greeting, any_token(lemma("привет")))
defclassify default: :unknown do
:greeting -> greeting?()
end
end
MyApp.SimpleClassifier.classify_text("Привет мир")
#=> {:greeting, %{confidence: :grammar}}
MyApp.SimpleClassifier.classify_text("Какой-то текст")
#=> {:unknown, %{confidence: :low}}License
MIT © Danila Poyarkov