Added fix for #4534

- in case of startup crash and automtic restart, ntopng would reload lists resetting errors. This caused the storm of updates
- updated the URL to https://snort.org/downloads/ip-block-list
- in case of failure we retry the URL only once (it used to be twice) after one hour.
This commit is contained in:
Luca Deri 2020-10-15 17:14:46 +02:00
parent b270dd1dd7
commit 77ab1e3a8a
3 changed files with 95 additions and 71 deletions

View file

@ -13,14 +13,21 @@ local alert_consts = require "alert_consts"
-- ##############################################
-- NOTE: metadata and status are handled as separate keys.
-- Metadata can only be updated by the gui, whereas status can only be
-- updated by housekeeping. This avoid concurrency issues.
local METADATA_KEY = "ntopng.prefs.category_lists.metadata"
local STATUS_KEY = "ntopng.prefs.category_lists.status"
local trace_level = TRACE_INFO -- TRACE_NORMAL
local CUSTOM_CATEGORY_MINING = 99
local CUSTOM_CATEGORY_MALWARE = 100
local CUSTOM_CATEGORY_MINING = 99
local CUSTOM_CATEGORY_MALWARE = 100
local CUSTOM_CATEGORY_ADVERTISEMENT = 101
local DEFAULT_UPDATE_INTERVAL = 86400
local MAX_LIST_ERRORS = 3
local DEFAULT_UPDATE_INTERVAL = 86400
local MAX_LIST_ERRORS = 2
local MIN_DOWNLOAD_INTERVAL = 3600
-- IP addresses have very litte impact on memory/load time.
-- 150k IP addresses rules can be loaded in 2 seconds
@ -61,8 +68,8 @@ local BUILTIN_LISTS = {
format = "ip",
enabled = true,
update_interval = DEFAULT_UPDATE_INTERVAL,
}, ["Cisco Talos Intelligence"] = {
url = "https://talosintelligence.com/documents/ip-blacklist",
}, ["Snort IP Blacklist"] = {
url = "https://snort.org/documents/ip-blacklist",
category = CUSTOM_CATEGORY_MALWARE,
format = "ip",
enabled = true,
@ -79,12 +86,6 @@ local BUILTIN_LISTS = {
format = "ip",
enabled = true,
update_interval = DEFAULT_UPDATE_INTERVAL,
}, ["MalwareDomainList Hosts"] = {
url = "https://www.malwaredomainlist.com/hostslist/hosts.txt",
category = CUSTOM_CATEGORY_MALWARE,
format = "hosts",
enabled = false,
update_interval = DEFAULT_UPDATE_INTERVAL,
}, ["Anti-WebMiner"] = {
url = "https://raw.githubusercontent.com/greatis/Anti-WebMiner/master/hosts",
category = CUSTOM_CATEGORY_MINING,
@ -126,13 +127,6 @@ local BUILTIN_LISTS = {
-- ##############################################
-- NOTE: metadata and status are handled as separate keys.
-- Metadata can only be updated by the gui, whereas status can only be
-- updated by housekeeping. This avoid concurrency issues.
local METADATA_KEY = "ntopng.prefs.category_lists.metadata"
local STATUS_KEY = "ntopng.prefs.category_lists.status"
local function loadListsFromRedis()
local lists_metadata = ntop.getPref(METADATA_KEY)
local lists_status = ntop.getPref(STATUS_KEY)
@ -167,13 +161,13 @@ end
-- @brief save the lists stats and other status to redis.
-- @note see saveListsMetadataToRedis for user preferences information
local function saveListsStatusToRedis(lists)
local function saveListsStatusToRedis(lists, caller)
local status = {}
for list_name, list in pairs(lists or {}) do
status[list_name] = list.status
end
ntop.setPref(STATUS_KEY, json.encode(status))
end
@ -253,7 +247,7 @@ end
-- ##############################################
-- Force a single list reload
function lists_utils.updateList(list_name)
function lists_utils.updateList(list_name)
ntop.setCache("ntopng.cache.category_lists.update." .. list_name, "1")
lists_utils.downloadLists()
end
@ -281,19 +275,20 @@ local function getNextListUpdate(list)
if(list.status.last_error and (list.status.num_errors < MAX_LIST_ERRORS)) then
-- When the download fails, retry next hour
interval = 3600
interval = MIN_DOWNLOAD_INTERVAL
else
interval = list.update_interval
end
local next_update
-- align if possible
if interval == 3600 then
next_update = ntop.roundTime(list.status.last_update, 3600, false)
elseif interval == 86400 then
next_update = ntop.roundTime(list.status.last_update, 86400, true --[[ UTC align ]])
else
if(interval < MIN_DOWNLOAD_INTERVAL) then interval = MIN_DOWNLOAD_INTERVAL end
next_update = list.status.last_update + interval
end
@ -302,15 +297,36 @@ end
-- Returns true if the given list should be updated
function shouldUpdate(list_name, list, now)
local list_file = getListCacheFile(list_name, false)
local next_update = getNextListUpdate(list)
local list_file
local next_update
if(list.enabled == false) then
return(false)
end
list_file = getListCacheFile(list_name, false)
next_update = getNextListUpdate(list, now)
-- note: num_errors is used to avoid retying downloading the same list again when
-- the file does not exist
return(list.enabled and
((now >= next_update) or
(not ntop.exists(list_file) and (list.status.num_errors < MAX_LIST_ERRORS)) or
(ntop.getCache("ntopng.cache.category_lists.update." .. list_name) == "1")))
if(false) then
tprint('---------------')
tprint(list_file)
tprint('-')
tprint(ntop.getCache("ntopng.cache.category_lists.update." .. list_name))
tprint('-')
tprint(list)
tprint('---------------')
tprint(((now >= next_update) or
(not ntop.exists(list_file) and (list.status.num_errors < MAX_LIST_ERRORS)) or
(ntop.getCache("ntopng.cache.category_lists.update." .. list_name) == "1")))
return(false)
else
-- note: num_errors is used to avoid retying downloading the same list again when
-- the file does not exist
return(((now >= next_update) or
(not ntop.exists(list_file) and (list.status.num_errors < MAX_LIST_ERRORS)) or
(ntop.getCache("ntopng.cache.category_lists.update." .. list_name) == "1")))
end
end
-- ##############################################
@ -331,10 +347,11 @@ local function checkListsUpdate(timeout)
for list_name, list in pairsByKeys(lists) do
local list_file = getListCacheFile(list_name, false)
if shouldUpdate(list_name, list, now) then
if(shouldUpdate(list_name, list, now)) then
local temp_fname = getListCacheFile(list_name, true)
traceError(trace_level, TRACE_CONSOLE, string.format("Updating list '%s'...", list_name))
local msg = string.format("Updating list '%s' [%s]...", list_name, list.url)
traceError(trace_level, TRACE_INFO, string.format("Updating list '%s'...", list_name))
local started_at = os.time()
local res = ntop.httpFetch(list.url, temp_fname, timeout)
@ -352,6 +369,8 @@ local function checkListsUpdate(timeout)
list_name
)
)
msg = msg .. "OK"
else
-- failure
local respcode = 0
@ -384,8 +403,12 @@ local function checkListsUpdate(timeout)
last_error
)
)
msg = msg .. "ERROR ["..last_error.."]"
end
traceError(TRACE_NORMAL, TRACE_CONSOLE, msg)
now = os.time()
-- set last_update even on failure to avoid blocking on the same list again
list.status.last_update = now
@ -400,7 +423,7 @@ local function checkListsUpdate(timeout)
end
-- update lists state
saveListsStatusToRedis(lists)
saveListsStatusToRedis(lists, "checkListsUpdate")
if(not all_processed) then
-- Still in progress, do not mark as finished yet
@ -650,7 +673,7 @@ local function reloadListsNow()
end
-- update lists state
saveListsStatusToRedis(lists)
saveListsStatusToRedis(lists, "reloadListsNow")
-- Load user-customized categories
for category_id, hosts in pairs(user_custom_categories) do
@ -669,9 +692,10 @@ local function reloadListsNow()
-- Calculate stats
stats.duration = (os.time() - stats.begin)
traceError(trace_level, TRACE_CONSOLE, string.format("Lists (%u hosts, %u IPs, %u JA3) loaded in %d seconds",
stats.num_hosts, stats.num_ips, stats.num_ja3, stats.duration))
traceError(TRACE_NORMAL, TRACE_CONSOLE,
string.format("Category Lists (%u hosts, %u IPs, %u JA3) loaded in %d sec",
stats.num_hosts, stats.num_ips, stats.num_ja3, stats.duration))
-- Save the stats
ntop.setCache("ntopng.cache.category_lists.load_stats", json.encode(stats))
@ -692,21 +716,6 @@ end
-- ##############################################
-- @brief Clears the lists download errors
function lists_utils.clearErrors()
local lists = lists_utils.getCategoryLists()
for _, list in pairs(lists) do
if(list.status ~= nil) then
list.status.num_errors = 0
end
end
saveListsStatusToRedis(lists)
end
-- ##############################################
-- This is run in housekeeping.lua
function lists_utils.checkReloadLists()
local forced_reload = (ntop.getCache("ntopng.cache.reload_lists_utils") == "1")
@ -744,4 +753,16 @@ end
-- ##############################################
function lists_utils.startup()
traceError(TRACE_NORMAL, TRACE_CONSOLE, "Refreshing category lists...")
lists_utils.downloadLists()
lists_utils.reloadLists()
-- Need to do the actual reload also here as otherwise some
-- flows may be misdetected until housekeeping.lua is executed
lists_utils.checkReloadLists()
end
-- ##############################################
return lists_utils