60 lines
1.1 KiB
Plaintext
60 lines
1.1 KiB
Plaintext
|
classifier "bayes" {
|
||
|
tokenizer {
|
||
|
name = "osb";
|
||
|
}
|
||
|
|
||
|
backend = "redis";
|
||
|
servers = "redis:6379";
|
||
|
min_tokens = 11;
|
||
|
min_learns = 200;
|
||
|
autolearn = true;
|
||
|
|
||
|
per_user = <<EOD
|
||
|
return function(task)
|
||
|
local rcpt = task:get_recipients(1)
|
||
|
|
||
|
if rcpt then
|
||
|
one_rcpt = rcpt[1]
|
||
|
if one_rcpt['domain'] then
|
||
|
return one_rcpt['domain']
|
||
|
end
|
||
|
end
|
||
|
|
||
|
return nil
|
||
|
end
|
||
|
EOD
|
||
|
|
||
|
statfile {
|
||
|
symbol = "BAYES_HAM";
|
||
|
spam = false;
|
||
|
}
|
||
|
statfile {
|
||
|
symbol = "BAYES_SPAM";
|
||
|
spam = true;
|
||
|
}
|
||
|
learn_condition =<<EOD
|
||
|
return function(task, is_spam, is_unlearn)
|
||
|
local prob = task:get_mempool():get_variable('bayes_prob', 'double')
|
||
|
|
||
|
if prob then
|
||
|
local in_class = false
|
||
|
local cl
|
||
|
if is_spam then
|
||
|
cl = 'spam'
|
||
|
in_class = prob >= 0.95
|
||
|
else
|
||
|
cl = 'ham'
|
||
|
in_class = prob <= 0.05
|
||
|
end
|
||
|
|
||
|
if in_class then
|
||
|
return false,string.format('already in class %s; probability %.2f%%',
|
||
|
cl, math.abs((prob - 0.5) * 200.0))
|
||
|
end
|
||
|
end
|
||
|
|
||
|
return true
|
||
|
end
|
||
|
EOD
|
||
|
}
|