444 lines
12 KiB
Lua
444 lines
12 KiB
Lua
local fun = require "fun"
|
|
local rspamd_logger = require "rspamd_logger"
|
|
local util = require "rspamd_util"
|
|
local lua_util = require "lua_util"
|
|
local rspamd_regexp = require "rspamd_regexp"
|
|
local ucl = require "ucl"
|
|
|
|
local complicated = {}
|
|
local rules = {}
|
|
local scores = {}
|
|
|
|
local function words_to_re(words, start)
|
|
return table.concat(fun.totable(fun.drop_n(start, words)), " ");
|
|
end
|
|
|
|
local function split(str, delim)
|
|
local result = {}
|
|
|
|
if not delim then
|
|
delim = '[^%s]+'
|
|
end
|
|
|
|
for token in string.gmatch(str, delim) do
|
|
table.insert(result, token)
|
|
end
|
|
|
|
return result
|
|
end
|
|
|
|
local function handle_header_def(hline, cur_rule)
|
|
--Now check for modifiers inside header's name
|
|
local hdrs = split(hline, '[^|]+')
|
|
local hdr_params = {}
|
|
local cur_param = {}
|
|
-- Check if an re is an ordinary re
|
|
local ordinary = true
|
|
|
|
for _,h in ipairs(hdrs) do
|
|
if h == 'ALL' or h == 'ALL:raw' then
|
|
ordinary = false
|
|
else
|
|
local args = split(h, '[^:]+')
|
|
cur_param['strong'] = false
|
|
cur_param['raw'] = false
|
|
cur_param['header'] = args[1]
|
|
|
|
if args[2] then
|
|
-- We have some ops that are required for the header, so it's not ordinary
|
|
ordinary = false
|
|
end
|
|
|
|
fun.each(function(func)
|
|
if func == 'addr' then
|
|
cur_param['function'] = function(str)
|
|
local addr_parsed = util.parse_addr(str)
|
|
local ret = {}
|
|
if addr_parsed then
|
|
for _,elt in ipairs(addr_parsed) do
|
|
if elt['addr'] then
|
|
table.insert(ret, elt['addr'])
|
|
end
|
|
end
|
|
end
|
|
|
|
return ret
|
|
end
|
|
elseif func == 'name' then
|
|
cur_param['function'] = function(str)
|
|
local addr_parsed = util.parse_addr(str)
|
|
local ret = {}
|
|
if addr_parsed then
|
|
for _,elt in ipairs(addr_parsed) do
|
|
if elt['name'] then
|
|
table.insert(ret, elt['name'])
|
|
end
|
|
end
|
|
end
|
|
|
|
return ret
|
|
end
|
|
elseif func == 'raw' then
|
|
cur_param['raw'] = true
|
|
elseif func == 'case' then
|
|
cur_param['strong'] = true
|
|
else
|
|
rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2',
|
|
func, cur_rule['symbol'])
|
|
end
|
|
end, fun.tail(args))
|
|
|
|
-- Some header rules require splitting to check of multiple headers
|
|
if cur_param['header'] == 'MESSAGEID' then
|
|
-- Special case for spamassassin
|
|
ordinary = false
|
|
elseif cur_param['header'] == 'ToCc' then
|
|
ordinary = false
|
|
else
|
|
table.insert(hdr_params, cur_param)
|
|
end
|
|
end
|
|
|
|
cur_rule['ordinary'] = ordinary and (not (#hdr_params > 1))
|
|
cur_rule['header'] = hdr_params
|
|
end
|
|
end
|
|
|
|
local function process_sa_conf(f)
|
|
local cur_rule = {}
|
|
local valid_rule = false
|
|
|
|
local function insert_cur_rule()
|
|
if not rules[cur_rule.type] then
|
|
rules[cur_rule.type] = {}
|
|
end
|
|
|
|
local target = rules[cur_rule.type]
|
|
|
|
if cur_rule.type == 'header' then
|
|
if not cur_rule.header[1].header then
|
|
rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
|
|
return
|
|
end
|
|
if not target[cur_rule.header[1].header] then
|
|
target[cur_rule.header[1].header] = {}
|
|
end
|
|
target = target[cur_rule.header[1].header]
|
|
end
|
|
|
|
if not cur_rule['symbol'] then
|
|
rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
|
|
return
|
|
end
|
|
target[cur_rule['symbol']] = cur_rule
|
|
cur_rule = {}
|
|
valid_rule = false
|
|
end
|
|
|
|
local function parse_score(words)
|
|
if #words == 3 then
|
|
-- score rule <x>
|
|
return tonumber(words[3])
|
|
elseif #words == 6 then
|
|
-- score rule <x1> <x2> <x3> <x4>
|
|
-- we assume here that bayes and network are enabled and select <x4>
|
|
return tonumber(words[6])
|
|
else
|
|
rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2])
|
|
end
|
|
|
|
return 0
|
|
end
|
|
|
|
local skip_to_endif = false
|
|
local if_nested = 0
|
|
for l in f:lines() do
|
|
(function ()
|
|
l = lua_util.rspamd_str_trim(l)
|
|
-- Replace bla=~/re/ with bla =~ /re/ (#2372)
|
|
l = l:gsub('([^%s])%s*([=!]~)%s*([^%s])', '%1 %2 %3')
|
|
|
|
if string.len(l) == 0 or string.sub(l, 1, 1) == '#' then
|
|
return
|
|
end
|
|
|
|
-- Unbalanced if/endif
|
|
if if_nested < 0 then if_nested = 0 end
|
|
if skip_to_endif then
|
|
if string.match(l, '^endif') then
|
|
if_nested = if_nested - 1
|
|
|
|
if if_nested == 0 then
|
|
skip_to_endif = false
|
|
end
|
|
elseif string.match(l, '^if') then
|
|
if_nested = if_nested + 1
|
|
elseif string.match(l, '^else') then
|
|
-- Else counterpart for if
|
|
skip_to_endif = false
|
|
end
|
|
table.insert(complicated, l)
|
|
return
|
|
else
|
|
if string.match(l, '^ifplugin') then
|
|
skip_to_endif = true
|
|
if_nested = if_nested + 1
|
|
table.insert(complicated, l)
|
|
elseif string.match(l, '^if !plugin%(') then
|
|
skip_to_endif = true
|
|
if_nested = if_nested + 1
|
|
table.insert(complicated, l)
|
|
elseif string.match(l, '^if') then
|
|
-- Unknown if
|
|
skip_to_endif = true
|
|
if_nested = if_nested + 1
|
|
table.insert(complicated, l)
|
|
elseif string.match(l, '^else') then
|
|
-- Else counterpart for if
|
|
skip_to_endif = true
|
|
table.insert(complicated, l)
|
|
elseif string.match(l, '^endif') then
|
|
if_nested = if_nested - 1
|
|
table.insert(complicated, l)
|
|
end
|
|
end
|
|
|
|
-- Skip comments
|
|
local words = fun.totable(fun.take_while(
|
|
function(w) return string.sub(w, 1, 1) ~= '#' end,
|
|
fun.filter(function(w)
|
|
return w ~= "" end,
|
|
fun.iter(split(l)))))
|
|
|
|
if words[1] == "header" then
|
|
-- header SYMBOL Header ~= /regexp/
|
|
if valid_rule then
|
|
insert_cur_rule()
|
|
end
|
|
if words[4] and (words[4] == '=~' or words[4] == '!~') then
|
|
cur_rule['type'] = 'header'
|
|
cur_rule['symbol'] = words[2]
|
|
|
|
if words[4] == '!~' then
|
|
table.insert(complicated, l)
|
|
return
|
|
end
|
|
|
|
cur_rule['re_expr'] = words_to_re(words, 4)
|
|
local unset_comp = string.find(cur_rule['re_expr'], '%s+%[if%-unset:')
|
|
if unset_comp then
|
|
table.insert(complicated, l)
|
|
return
|
|
end
|
|
|
|
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
|
|
|
|
if not cur_rule['re'] then
|
|
rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2",
|
|
cur_rule['re_expr'], cur_rule['symbol'])
|
|
table.insert(complicated, l)
|
|
return
|
|
else
|
|
handle_header_def(words[3], cur_rule)
|
|
if not cur_rule['ordinary'] then
|
|
table.insert(complicated, l)
|
|
return
|
|
end
|
|
end
|
|
|
|
valid_rule = true
|
|
else
|
|
table.insert(complicated, l)
|
|
return
|
|
end
|
|
elseif words[1] == "body" then
|
|
-- body SYMBOL /regexp/
|
|
if valid_rule then
|
|
insert_cur_rule()
|
|
end
|
|
|
|
cur_rule['symbol'] = words[2]
|
|
if words[3] and (string.sub(words[3], 1, 1) == '/'
|
|
or string.sub(words[3], 1, 1) == 'm') then
|
|
cur_rule['type'] = 'sabody'
|
|
cur_rule['re_expr'] = words_to_re(words, 2)
|
|
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
|
|
if cur_rule['re'] then
|
|
|
|
valid_rule = true
|
|
end
|
|
else
|
|
-- might be function
|
|
table.insert(complicated, l)
|
|
return
|
|
end
|
|
elseif words[1] == "rawbody" then
|
|
-- body SYMBOL /regexp/
|
|
if valid_rule then
|
|
insert_cur_rule()
|
|
end
|
|
|
|
cur_rule['symbol'] = words[2]
|
|
if words[3] and (string.sub(words[3], 1, 1) == '/'
|
|
or string.sub(words[3], 1, 1) == 'm') then
|
|
cur_rule['type'] = 'sarawbody'
|
|
cur_rule['re_expr'] = words_to_re(words, 2)
|
|
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
|
|
if cur_rule['re'] then
|
|
valid_rule = true
|
|
end
|
|
else
|
|
table.insert(complicated, l)
|
|
return
|
|
end
|
|
elseif words[1] == "full" then
|
|
-- body SYMBOL /regexp/
|
|
if valid_rule then
|
|
insert_cur_rule()
|
|
end
|
|
|
|
cur_rule['symbol'] = words[2]
|
|
|
|
if words[3] and (string.sub(words[3], 1, 1) == '/'
|
|
or string.sub(words[3], 1, 1) == 'm') then
|
|
cur_rule['type'] = 'message'
|
|
cur_rule['re_expr'] = words_to_re(words, 2)
|
|
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
|
|
cur_rule['raw'] = true
|
|
if cur_rule['re'] then
|
|
valid_rule = true
|
|
end
|
|
else
|
|
table.insert(complicated, l)
|
|
return
|
|
end
|
|
elseif words[1] == "uri" then
|
|
-- uri SYMBOL /regexp/
|
|
if valid_rule then
|
|
insert_cur_rule()
|
|
end
|
|
cur_rule['type'] = 'uri'
|
|
cur_rule['symbol'] = words[2]
|
|
cur_rule['re_expr'] = words_to_re(words, 2)
|
|
cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
|
|
if cur_rule['re'] and cur_rule['symbol'] then
|
|
valid_rule = true
|
|
else
|
|
table.insert(complicated, l)
|
|
return
|
|
end
|
|
elseif words[1] == "meta" then
|
|
-- meta SYMBOL expression
|
|
if valid_rule then
|
|
insert_cur_rule()
|
|
end
|
|
table.insert(complicated, l)
|
|
return
|
|
elseif words[1] == "describe" and valid_rule then
|
|
cur_rule['description'] = words_to_re(words, 2)
|
|
elseif words[1] == "score" then
|
|
scores[words[2]] = parse_score(words)
|
|
else
|
|
table.insert(complicated, l)
|
|
return
|
|
end
|
|
end)()
|
|
end
|
|
if valid_rule then
|
|
insert_cur_rule()
|
|
end
|
|
end
|
|
|
|
for _,matched in ipairs(arg) do
|
|
local f = io.open(matched, "r")
|
|
if f then
|
|
rspamd_logger.messagex(rspamd_config, 'loading SA rules from %s', matched)
|
|
process_sa_conf(f)
|
|
else
|
|
rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
|
|
end
|
|
end
|
|
|
|
local multimap_conf = {}
|
|
|
|
local function handle_rule(what, syms, hdr)
|
|
local mtype
|
|
local filter
|
|
local fname
|
|
local header
|
|
local sym = what:upper()
|
|
if what == 'sabody' then
|
|
mtype = 'content'
|
|
fname = 'body_re.map'
|
|
filter = 'oneline'
|
|
elseif what == 'sarawbody' then
|
|
fname = 'raw_body_re.map'
|
|
mtype = 'content'
|
|
filter = 'rawtext'
|
|
elseif what == 'full' then
|
|
fname = 'full_re.map'
|
|
mtype = 'content'
|
|
filter = 'full'
|
|
elseif what == 'uri' then
|
|
fname = 'uri_re.map'
|
|
mtype = 'url'
|
|
filter = 'full'
|
|
elseif what == 'header' then
|
|
fname = ('hdr_' .. hdr .. '_re.map'):lower()
|
|
mtype = 'header'
|
|
header = hdr
|
|
sym = sym .. '_' .. hdr:upper()
|
|
else
|
|
rspamd_logger.errx('unknown type: %s', what)
|
|
return
|
|
end
|
|
local conf = {
|
|
type = mtype,
|
|
filter = filter,
|
|
symbol = 'SA_MAP_AUTO_' .. sym,
|
|
regexp = true,
|
|
map = fname,
|
|
header = header,
|
|
symbols = {}
|
|
}
|
|
local re_file = io.open(fname, 'w')
|
|
|
|
for k,r in pairs(syms) do
|
|
local score = 0.0
|
|
if scores[k] then
|
|
score = scores[k]
|
|
end
|
|
re_file:write(string.format('/%s/ %s:%f\n', tostring(r.re), k, score))
|
|
table.insert(conf.symbols, k)
|
|
end
|
|
|
|
re_file:close()
|
|
|
|
multimap_conf[sym:lower()] = conf
|
|
rspamd_logger.messagex('stored %s regexp in %s', sym:lower(), fname)
|
|
end
|
|
|
|
for k,v in pairs(rules) do
|
|
if k == 'header' then
|
|
for h,r in pairs(v) do
|
|
handle_rule(k, r, h)
|
|
end
|
|
else
|
|
handle_rule(k, v)
|
|
end
|
|
end
|
|
|
|
local out = ucl.to_format(multimap_conf, 'ucl')
|
|
local mmap_conf = io.open('auto_multimap.conf', 'w')
|
|
mmap_conf:write(out)
|
|
mmap_conf:close()
|
|
rspamd_logger.messagex('stored multimap conf in %s', 'auto_multimap.conf')
|
|
|
|
local sa_remain = io.open('auto_sa.conf', 'w')
|
|
fun.each(function(l)
|
|
sa_remain:write(l)
|
|
sa_remain:write('\n')
|
|
end, fun.filter(function(l) return not string.match(l, '^%s+$') end, complicated))
|
|
sa_remain:close()
|
|
rspamd_logger.messagex('stored sa remains conf in %s', 'auto_sa.conf')
|