Documentation
¶
Overview ¶
Package heuristic provides rule-based evaluation metrics that don't require LLM calls.
String Metrics ¶
Basic string comparison metrics:
- Equals: Exact string match
- Contains: Substring presence
- StartsWith, EndsWith: Prefix/suffix matching
- ContainsAny, ContainsAll: Multiple value matching
- NotEmpty: Non-empty output check
- LengthBetween, WordCount: Length constraints
Parsing Metrics ¶
Format validation metrics:
- IsJSON, IsJSONObject, IsJSONArray: JSON validation
- JSONHasKeys, JSONSchemaValid: JSON structure validation
- IsXML: XML validation
- IsNumber, IsBoolean: Type validation
Pattern Metrics ¶
Regular expression and format validation:
- RegexMatch, RegexNotMatch: Pattern matching
- EmailFormat, URLFormat: Common formats
- PhoneFormat, DateFormat, UUIDFormat: Specialized formats
Similarity Metrics ¶
Text similarity metrics:
- LevenshteinSimilarity: Edit distance based
- JaccardSimilarity: Set-based overlap
- CosineSimilarity: Word vector similarity
- BLEU: N-gram precision (machine translation style)
- ROUGE: Longest common subsequence
- FuzzyMatch: Combined similarity score
Usage Example ¶
metrics := []evaluation.Metric{
heuristic.NewEquals(false),
heuristic.NewContains(false),
heuristic.NewIsJSON(),
heuristic.NewLevenshteinSimilarity(false),
}
engine := evaluation.NewEngine(metrics)
input := evaluation.NewMetricInput("prompt", "response").WithExpected("expected")
result := engine.EvaluateOne(ctx, input)
Index ¶
- type BLEU
- type Contains
- type ContainsAll
- type ContainsAny
- type CosineSimilarity
- type DateFormat
- type EmailFormat
- type EndsWith
- type Equals
- type ExtractJSON
- type FuzzyMatch
- type IsBoolean
- type IsJSON
- type IsJSONArray
- type IsJSONObject
- type IsNumber
- type IsXML
- type JSONHasKeys
- type JSONSchemaValid
- type JaccardSimilarity
- type LengthBetween
- type LevenshteinSimilarity
- type NoOffensiveLanguage
- type NotEmpty
- type PhoneFormat
- type ROUGE
- type RegexFindAll
- type RegexMatch
- type RegexNotMatch
- type SemanticSimilarity
- type StartsWith
- type URLFormat
- type UUIDFormat
- type WordCount
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
This section is empty.
Types ¶
type BLEU ¶
type BLEU struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
BLEU calculates a simplified BLEU (Bilingual Evaluation Understudy) score. This is a simplified implementation focusing on n-gram precision.
func (*BLEU) Score ¶
func (m *BLEU) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score calculates the BLEU score between output and expected.
type Contains ¶
type Contains struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
Contains checks if the output contains the expected value.
func NewContains ¶
NewContains creates a new Contains metric.
func (*Contains) Score ¶
func (m *Contains) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output contains expected.
type ContainsAll ¶
type ContainsAll struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
ContainsAll checks if the output contains all of the specified values.
func NewContainsAll ¶
func NewContainsAll(values []string, caseSensitive bool) *ContainsAll
NewContainsAll creates a new ContainsAll metric.
func (*ContainsAll) Score ¶
func (m *ContainsAll) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output contains all of the values.
type ContainsAny ¶
type ContainsAny struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
ContainsAny checks if the output contains any of the specified values.
func NewContainsAny ¶
func NewContainsAny(values []string, caseSensitive bool) *ContainsAny
NewContainsAny creates a new ContainsAny metric.
func (*ContainsAny) Score ¶
func (m *ContainsAny) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output contains any of the values.
type CosineSimilarity ¶
type CosineSimilarity struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
CosineSimilarity calculates word-based cosine similarity.
func NewCosineSimilarity ¶
func NewCosineSimilarity(caseSensitive bool) *CosineSimilarity
NewCosineSimilarity creates a new CosineSimilarity metric.
func (*CosineSimilarity) Score ¶
func (m *CosineSimilarity) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score calculates the cosine similarity between output and expected.
type DateFormat ¶
type DateFormat struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
DateFormat checks if the output matches a date format.
func NewDateFormat ¶
func NewDateFormat() *DateFormat
NewDateFormat creates a new DateFormat metric with ISO format by default.
func NewDateFormatWithPattern ¶
func NewDateFormatWithPattern(pattern string) (*DateFormat, error)
NewDateFormatWithPattern creates a new DateFormat metric with custom pattern.
func (*DateFormat) Score ¶
func (m *DateFormat) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output matches a date format.
type EmailFormat ¶
type EmailFormat struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
EmailFormat checks if the output contains a valid email format.
func NewEmailFormat ¶
func NewEmailFormat() *EmailFormat
NewEmailFormat creates a new EmailFormat metric.
func (*EmailFormat) Score ¶
func (m *EmailFormat) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output is a valid email format.
type EndsWith ¶
type EndsWith struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
EndsWith checks if the output ends with the expected value.
func NewEndsWith ¶
NewEndsWith creates a new EndsWith metric.
func (*EndsWith) Score ¶
func (m *EndsWith) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output ends with expected.
type Equals ¶
type Equals struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
Equals checks if the output exactly matches the expected value.
func (*Equals) Score ¶
func (m *Equals) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output equals expected.
type ExtractJSON ¶
type ExtractJSON struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
ExtractJSON extracts JSON from markdown code blocks or raw text.
func NewExtractJSON ¶
func NewExtractJSON(inner evaluation.Metric) *ExtractJSON
NewExtractJSON creates a new ExtractJSON metric that extracts JSON before evaluation.
func (*ExtractJSON) Score ¶
func (m *ExtractJSON) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score extracts JSON and passes to inner metric.
type FuzzyMatch ¶
type FuzzyMatch struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
FuzzyMatch calculates a fuzzy matching score using multiple similarity metrics.
func NewFuzzyMatch ¶
func NewFuzzyMatch(threshold float64, caseSensitive bool) *FuzzyMatch
NewFuzzyMatch creates a new FuzzyMatch metric.
func (*FuzzyMatch) Score ¶
func (m *FuzzyMatch) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score calculates a combined fuzzy match score.
type IsBoolean ¶
type IsBoolean struct {
evaluation.BaseMetric
}
IsBoolean checks if the output is a valid boolean (true/false/yes/no).
func (*IsBoolean) Score ¶
func (m *IsBoolean) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output is a valid boolean.
type IsJSON ¶
type IsJSON struct {
evaluation.BaseMetric
}
IsJSON checks if the output is valid JSON.
func (*IsJSON) Score ¶
func (m *IsJSON) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output is valid JSON.
type IsJSONArray ¶
type IsJSONArray struct {
evaluation.BaseMetric
}
IsJSONArray checks if the output is a valid JSON array.
func NewIsJSONArray ¶
func NewIsJSONArray() *IsJSONArray
NewIsJSONArray creates a new IsJSONArray metric.
func (*IsJSONArray) Score ¶
func (m *IsJSONArray) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output is a valid JSON array.
type IsJSONObject ¶
type IsJSONObject struct {
evaluation.BaseMetric
}
IsJSONObject checks if the output is a valid JSON object.
func NewIsJSONObject ¶
func NewIsJSONObject() *IsJSONObject
NewIsJSONObject creates a new IsJSONObject metric.
func (*IsJSONObject) Score ¶
func (m *IsJSONObject) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output is a valid JSON object.
type IsNumber ¶
type IsNumber struct {
evaluation.BaseMetric
}
IsNumber checks if the output is a valid number.
func (*IsNumber) Score ¶
func (m *IsNumber) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output is a valid number.
type IsXML ¶
type IsXML struct {
evaluation.BaseMetric
}
IsXML checks if the output is valid XML.
func (*IsXML) Score ¶
func (m *IsXML) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output is valid XML.
type JSONHasKeys ¶
type JSONHasKeys struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
JSONHasKeys checks if the JSON output has the specified keys.
func NewJSONHasKeys ¶
func NewJSONHasKeys(keys []string) *JSONHasKeys
NewJSONHasKeys creates a new JSONHasKeys metric.
func (*JSONHasKeys) Score ¶
func (m *JSONHasKeys) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if JSON output has all required keys.
type JSONSchemaValid ¶
type JSONSchemaValid struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
JSONSchemaValid checks if the JSON output matches a simple schema.
func NewJSONSchemaValid ¶
func NewJSONSchemaValid(required map[string]string) *JSONSchemaValid
NewJSONSchemaValid creates a new JSONSchemaValid metric.
func (*JSONSchemaValid) Score ¶
func (m *JSONSchemaValid) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if JSON output matches the schema.
type JaccardSimilarity ¶
type JaccardSimilarity struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
JaccardSimilarity calculates the Jaccard similarity coefficient.
func NewJaccardSimilarity ¶
func NewJaccardSimilarity(caseSensitive, useWords bool) *JaccardSimilarity
NewJaccardSimilarity creates a new JaccardSimilarity metric.
func (*JaccardSimilarity) Score ¶
func (m *JaccardSimilarity) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score calculates the Jaccard similarity between output and expected.
type LengthBetween ¶
type LengthBetween struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
LengthBetween checks if the output length is within a range.
func NewLengthBetween ¶
func NewLengthBetween(min, max int) *LengthBetween
NewLengthBetween creates a new LengthBetween metric.
func (*LengthBetween) Score ¶
func (m *LengthBetween) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output length is within range.
type LevenshteinSimilarity ¶
type LevenshteinSimilarity struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
LevenshteinSimilarity calculates similarity based on Levenshtein distance.
func NewLevenshteinSimilarity ¶
func NewLevenshteinSimilarity(caseSensitive bool) *LevenshteinSimilarity
NewLevenshteinSimilarity creates a new LevenshteinSimilarity metric.
func (*LevenshteinSimilarity) Score ¶
func (m *LevenshteinSimilarity) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score calculates the Levenshtein similarity between output and expected.
type NoOffensiveLanguage ¶
type NoOffensiveLanguage struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
NoOffensiveLanguage checks for offensive language patterns.
func NewNoOffensiveLanguage ¶
func NewNoOffensiveLanguage(patterns []string) *NoOffensiveLanguage
NewNoOffensiveLanguage creates a new NoOffensiveLanguage metric with custom patterns.
func (*NoOffensiveLanguage) Score ¶
func (m *NoOffensiveLanguage) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output contains offensive language.
type NotEmpty ¶
type NotEmpty struct {
evaluation.BaseMetric
}
NotEmpty checks if the output is not empty.
func (*NotEmpty) Score ¶
func (m *NotEmpty) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output is not empty.
type PhoneFormat ¶
type PhoneFormat struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
PhoneFormat checks if the output contains a phone number format.
func NewPhoneFormat ¶
func NewPhoneFormat() *PhoneFormat
NewPhoneFormat creates a new PhoneFormat metric.
func (*PhoneFormat) Score ¶
func (m *PhoneFormat) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output is a valid phone format.
type ROUGE ¶
type ROUGE struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
ROUGE calculates a simplified ROUGE-L score based on longest common subsequence.
func (*ROUGE) Score ¶
func (m *ROUGE) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score calculates the ROUGE-L score between output and expected.
type RegexFindAll ¶
type RegexFindAll struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
RegexFindAll counts how many times a pattern matches.
func NewRegexFindAll ¶
func NewRegexFindAll(pattern string, minMatches, maxMatches int) (*RegexFindAll, error)
NewRegexFindAll creates a new RegexFindAll metric.
func (*RegexFindAll) Score ¶
func (m *RegexFindAll) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates the number of pattern matches.
type RegexMatch ¶
type RegexMatch struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
RegexMatch checks if the output matches a regular expression.
func MustRegexMatch ¶
func MustRegexMatch(pattern string) *RegexMatch
MustRegexMatch creates a new RegexMatch metric, panics on invalid pattern.
func NewRegexMatch ¶
func NewRegexMatch(pattern string) (*RegexMatch, error)
NewRegexMatch creates a new RegexMatch metric.
func (*RegexMatch) Score ¶
func (m *RegexMatch) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output matches the regex pattern.
type RegexNotMatch ¶
type RegexNotMatch struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
RegexNotMatch checks if the output does NOT match a regular expression.
func MustRegexNotMatch ¶
func MustRegexNotMatch(pattern string) *RegexNotMatch
MustRegexNotMatch creates a new RegexNotMatch metric, panics on invalid pattern.
func NewRegexNotMatch ¶
func NewRegexNotMatch(pattern string) (*RegexNotMatch, error)
NewRegexNotMatch creates a new RegexNotMatch metric.
func (*RegexNotMatch) Score ¶
func (m *RegexNotMatch) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output does not match the regex pattern.
type SemanticSimilarity ¶
type SemanticSimilarity struct {
evaluation.BaseMetric
}
SemanticSimilarity is a placeholder for embedding-based semantic similarity. In a full implementation, this would use an embedding model.
func NewSemanticSimilarity ¶
func NewSemanticSimilarity() *SemanticSimilarity
NewSemanticSimilarity creates a new SemanticSimilarity metric. Note: This requires an embedding provider to be useful.
func (*SemanticSimilarity) Score ¶
func (m *SemanticSimilarity) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score falls back to cosine similarity for now. Override this with embedding-based similarity when available.
type StartsWith ¶
type StartsWith struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
StartsWith checks if the output starts with the expected value.
func NewStartsWith ¶
func NewStartsWith(caseSensitive bool) *StartsWith
NewStartsWith creates a new StartsWith metric.
func (*StartsWith) Score ¶
func (m *StartsWith) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output starts with expected.
type URLFormat ¶
type URLFormat struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
URLFormat checks if the output contains a valid URL format.
func (*URLFormat) Score ¶
func (m *URLFormat) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output is a valid URL format.
type UUIDFormat ¶
type UUIDFormat struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
UUIDFormat checks if the output is a valid UUID format.
func NewUUIDFormat ¶
func NewUUIDFormat() *UUIDFormat
NewUUIDFormat creates a new UUIDFormat metric.
func (*UUIDFormat) Score ¶
func (m *UUIDFormat) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output is a valid UUID format.
type WordCount ¶
type WordCount struct {
evaluation.BaseMetric
// contains filtered or unexported fields
}
WordCount checks if the output word count is within a range.
func NewWordCount ¶
NewWordCount creates a new WordCount metric.
func (*WordCount) Score ¶
func (m *WordCount) Score(ctx context.Context, input evaluation.MetricInput) *evaluation.ScoreResult
Score evaluates if output word count is within range.