2017-03-02 18:23:23 +08:00
|
|
|
classifier "bayes" {
|
|
|
|
tokenizer {
|
|
|
|
name = "osb";
|
|
|
|
}
|
|
|
|
|
|
|
|
backend = "redis";
|
2018-05-17 03:25:55 +08:00
|
|
|
name = "per_domain_bayes";
|
2017-03-02 18:23:23 +08:00
|
|
|
min_tokens = 11;
|
|
|
|
min_learns = 20;
|
2018-04-26 19:51:55 +08:00
|
|
|
new_schema = true;
|
|
|
|
expiry = 50d;
|
2017-03-02 18:23:23 +08:00
|
|
|
per_user = <<EOD
|
|
|
|
return function(task)
|
|
|
|
local rcpt = task:get_recipients(1)
|
|
|
|
|
|
|
|
if rcpt then
|
|
|
|
one_rcpt = rcpt[1]
|
|
|
|
if one_rcpt['domain'] then
|
|
|
|
return one_rcpt['domain']
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
return nil
|
|
|
|
end
|
|
|
|
EOD
|
|
|
|
|
|
|
|
statfile {
|
|
|
|
symbol = "BAYES_HAM";
|
|
|
|
spam = false;
|
|
|
|
}
|
|
|
|
statfile {
|
|
|
|
symbol = "BAYES_SPAM";
|
|
|
|
spam = true;
|
|
|
|
}
|
|
|
|
learn_condition =<<EOD
|
|
|
|
return function(task, is_spam, is_unlearn)
|
|
|
|
local prob = task:get_mempool():get_variable('bayes_prob', 'double')
|
|
|
|
|
|
|
|
if prob then
|
|
|
|
local in_class = false
|
|
|
|
local cl
|
|
|
|
if is_spam then
|
|
|
|
cl = 'spam'
|
|
|
|
in_class = prob >= 0.95
|
|
|
|
else
|
|
|
|
cl = 'ham'
|
|
|
|
in_class = prob <= 0.05
|
|
|
|
end
|
|
|
|
|
|
|
|
if in_class then
|
|
|
|
return false,string.format('already in class %s; probability %.2f%%',
|
|
|
|
cl, math.abs((prob - 0.5) * 200.0))
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
return true
|
|
|
|
end
|
|
|
|
EOD
|
|
|
|
}
|