Alerts API cleanup and JSON migration

This commit is contained in:
emanuele-f 2019-07-29 15:02:14 +02:00
parent 3ca04b95ec
commit c183a577be
42 changed files with 1353 additions and 880 deletions

View file

@ -111,120 +111,6 @@ end
-- ##############################################
--! @brief Creates an alert object
--! @param metadata the information about the alert type and severity
--! @return an alert object on success, nil on error
function alerts_api:newAlert(metadata)
if(metadata == nil) then
alertErrorTraceback("alerts_api:newAlert() missing argument")
return(nil)
end
local obj = table.clone(metadata)
if type(obj.periodicity) == "string" then
if(str_2_periodicity[obj.periodicity]) then
obj.periodicity = str_2_periodicity[obj.periodicity]
else
alertErrorTraceback("unknown periodicity '".. obj.periodicity .."'")
return(nil)
end
end
if(type(obj.entity) ~= "string") then alertErrorTraceback("'entity' string required") end
if(type(obj.type) ~= "string") then alertErrorTraceback("'type' string required") end
if(type(obj.severity) ~= "string") then alertErrorTraceback("'severity' string required") end
obj.entity_type_id = alertEntity(obj.entity)
obj.type_id = alertType(obj.type)
obj.severity_id = alertSeverity(obj.severity)
obj.periodicity = obj.periodicity or 0
if(type(obj.entity_type_id) ~= "number") then alertErrorTraceback("unknown entity_type '".. obj.entity .."'") end
if(type(obj.type_id) ~= "number") then alertErrorTraceback("unknown alert_type '".. obj.type .."'") end
if(type(obj.severity_id) ~= "number") then alertErrorTraceback("unknown severity '".. obj.severity .."'") end
local alert_id = makeAlertId(obj.type_id, obj.subtype, obj.periodicity, obj.entity_type_id)
known_alerts[alert_id] = obj
setmetatable(obj, self)
self.__index = self
return(obj)
end
-- ##############################################
-- TODO change in "store"
--! @brief Triggers a new alert or refreshes an existing one (if already engaged)
--! @param entity_value the string representing the entity of the alert (e.g. "192.168.1.1")
--! @param alert_message the message (string) or json (table) to store
--! @param when (optional) the time when the trigger event occurs
--! @return true on success, false otherwise
function alerts_api:trigger(entity_value, alert_message, when)
local force = false
local msg = alert_message
local ifid = interface.getId()
when = when or os.time()
if(type(alert_message) == "table") then
msg = json.encode(alert_message)
end
if alerts_api.isEntityAlertDisabled(ifid, self.entity_type_id, entity_value, self.type_id) then
incDisabledAlertsCount(ifid, -1, self.entity_type_id, entity_value, self.type_id)
return(false)
end
local rv = interface.storeAlert(when, when, self.periodicity,
self.type_id, self.subtype or "", self.severity_id,
self.entity_type_id, entity_value, msg)
if(self.entity == "host") then
-- NOTE: for engaged alerts this operation is performed during trigger in C
interface.incTotalHostAlerts(entity_value, self.type_id)
end
if(rv) then
local action = "store"
local message = {
ifid = interface.getId(),
entity_type = self.entity_type_id,
entity_value = entity_value,
type = self.type_id,
severity = self.severity_id,
message = msg,
tstamp = when,
action = action,
}
alert_endpoints.dispatchNotification(message, json.encode(message))
end
return(rv)
end
-- ##############################################
function alerts_api.parseAlert(metadata)
local alert_id = makeAlertId(metadata.alert_type, metadata.alert_subtype, metadata.alert_periodicity, metadata.alert_entity)
if known_alerts[alert_id] then
return(known_alerts[alert_id])
end
-- new alert
return(alerts_api:newAlert({
entity = alertEntityRaw(metadata.alert_entity),
type = alertTypeRaw(metadata.alert_type),
severity = alertSeverityRaw(metadata.alert_severity),
periodicity = tonumber(metadata.alert_periodicity),
subtype = metadata.alert_subtype,
}))
end
-- ##############################################
-- TODO unify alerts and metadata/notications format
function alerts_api.parseNotification(metadata)
local alert_id = makeAlertId(alertType(metadata.type), metadata.alert_subtype, metadata.alert_periodicity, alertEntity(metadata.entity_type))
@ -335,55 +221,112 @@ end
-- ##############################################
-- TODO: remove the "new_" prefix and unify with other alerts
--! @brief Stores a single alert (or event) into the alerts database
--! @param entity_info data returned by one of the entity_info building functions
--! @param type_info data returned by one of the type_info building functions
--! @param when (optional) the time when the release event occurs
--! @return true if the alert was successfully stored, false otherwise
function alerts_api.store(entity_info, type_info, when)
local force = false
local alert_json = json.encode(type_info.alert_type_params)
local ifid = interface.getId()
local granularity_sec = type_info.alert_granularity and type_info.alert_granularity.granularity_seconds or 0
local granularity_id = type_info.alert_granularity and type_info.alert_granularity.granularity_id or -1
local alert_json = plain_message or json.encode(type_info.alert_type_params)
local subtype = type_info.alert_subtype or ""
when = when or os.time()
if alerts_api.isEntityAlertDisabled(ifid, entity_info.alert_entity.entity_id, entity_info.alert_entity_val, type_info.alert_type.alert_id) then
incDisabledAlertsCount(ifid, other_granularity, entity_info.alert_entity.entity_id, entity_info.alert_entity_val, type_info.alert_type.alert_id)
return(false)
end
local rv = interface.storeAlert(when, when, granularity_sec,
type_info.alert_type.alert_id, subtype, type_info.alert_severity.severity_id,
entity_info.alert_entity.entity_id, entity_info.alert_entity_val, alert_json)
if(entity_info.alert_entity.entity_id == alertEntity("host")) then
-- NOTE: for engaged alerts this operation is performed during trigger in C
interface.incTotalHostAlerts(entity_info.alert_entity_val, type_info.alert_type.alert_id)
end
if(rv) then
local action = "store"
local message = {
ifid = ifid,
granularity = granularity_sec,
entity_type = entity_info.alert_entity.entity_id,
entity_value = entity_info.alert_entity_val,
type = type_info.alert_type.alert_id,
severity = type_info.alert_severity.severity_id,
message = alert_json,
subtype = subtype,
tstamp = when,
action = action,
}
alert_endpoints.dispatchNotification(message, json.encode(message))
end
return(rv)
end
-- ##############################################
--! @brief Trigger an alert of given type on the entity
--! @param entity_info data returned by one of the entity_info building functions
--! @param type_info data returned by one of the type_info building functions
--! @param when (optional) the time when the release event occurs
--! @return true on if the alert was triggered, false otherwise
--! @note The actual trigger is performed asynchronously
--! @return true on success, false otherwise
function alerts_api.new_trigger(entity_info, type_info, when)
--! @note false is also returned if an existing alert is found and refreshed
function alerts_api.trigger(entity_info, type_info, when)
when = when or os.time()
local ifid = interface.getId()
local granularity_sec = type_info.alert_granularity and type_info.alert_granularity.granularity_seconds or 0
local granularity_id = type_info.alert_granularity and type_info.alert_granularity.granularity_id or nil
if(type_info.alert_granularity == nil) then
alertErrorTraceback("Missing mandatory granularity")
return(false)
end
local granularity_sec = type_info.alert_granularity.granularity_seconds
local granularity_id = type_info.alert_granularity.granularity_id
local subtype = type_info.alert_subtype or ""
local alert_json = json.encode(type_info.alert_type_params)
local is_disabled = alerts_api.isEntityAlertDisabled(ifid, entity_info.alert_entity.entity_id, entity_info.alert_entity_val, type_info.alert_type.alert_id)
local triggered
local alert_key_name = get_alert_triggered_key(type_info)
if(granularity_id ~= nil) then
local triggered = true
local alert_key_name = get_alert_triggered_key(type_info)
local params = {alert_key_name, granularity_id,
type_info.alert_type.severity.severity_id, type_info.alert_type.alert_id,
subtype, alert_json, is_disabled
}
local params = {alert_key_name, granularity_id,
type_info.alert_severity.severity_id, type_info.alert_type.alert_id,
subtype, alert_json, is_disabled
}
if((host.storeTriggeredAlert) and (entity_info.alert_entity.entity_id == alertEntity("host"))) then
triggered = host.storeTriggeredAlert(table.unpack(params))
elseif((interface.storeTriggeredAlert) and (entity_info.alert_entity.entity_id == alertEntity("interface"))) then
triggered = interface.storeTriggeredAlert(table.unpack(params))
elseif((network.storeTriggeredAlert) and (entity_info.alert_entity.entity_id == alertEntity("network"))) then
triggered = network.storeTriggeredAlert(table.unpack(params))
end
if(not triggered) then
if(do_trace) then print("[Don't Trigger alert (already triggered?) @ "..granularity_sec.."] "..
entity_info.alert_entity_val .."@"..type_info.alert_type.i18n_title..":".. subtype .. "\n") end
return(false)
elseif(is_disabled) then
if(do_trace) then print("[COUNT Disabled alert @ "..granularity_sec.."] "..
entity_info.alert_entity_val .."@"..type_info.alert_type.i18n_title..":".. subtype .. "\n") end
incDisabledAlertsCount(ifid, granularity_id, entity_info.alert_entity.entity_id, entity_info.alert_entity_val, type_info.alert_type.alert_id)
else
if(do_trace) then print("[TRIGGER alert @ "..granularity_sec.."] "..
entity_info.alert_entity_val .."@"..type_info.alert_type.i18n_title..":".. subtype .. "\n") end
end
if((host.storeTriggeredAlert) and (entity_info.alert_entity.entity_id == alertEntity("host"))) then
triggered = host.storeTriggeredAlert(table.unpack(params))
elseif((interface.storeTriggeredAlert) and (entity_info.alert_entity.entity_id == alertEntity("interface"))) then
triggered = interface.storeTriggeredAlert(table.unpack(params))
elseif((network.storeTriggeredAlert) and (entity_info.alert_entity.entity_id == alertEntity("network"))) then
triggered = network.storeTriggeredAlert(table.unpack(params))
else
alertErrorTraceback("Bad lua context for entity_type " .. entity_info.alert_entity.entity_id)
return(false)
end
local action = ternary((granularity_id ~= nil), "engaged", "stored")
if(not triggered) then
if(do_trace) then print("[Don't Trigger alert (already triggered?) @ "..granularity_sec.."] "..
entity_info.alert_entity_val .."@"..type_info.alert_type.i18n_title..":".. subtype .. "\n") end
return(false)
elseif(is_disabled) then
if(do_trace) then print("[COUNT Disabled alert @ "..granularity_sec.."] "..
entity_info.alert_entity_val .."@"..type_info.alert_type.i18n_title..":".. subtype .. "\n") end
incDisabledAlertsCount(ifid, granularity_id, entity_info.alert_entity.entity_id, entity_info.alert_entity_val, type_info.alert_type.alert_id)
return(false)
else
if(do_trace) then print("[TRIGGER alert @ "..granularity_sec.."] "..
entity_info.alert_entity_val .."@"..type_info.alert_type.i18n_title..":".. subtype .. "\n") end
end
local alert_event = {
ifid = ifid,
@ -391,11 +334,11 @@ function alerts_api.new_trigger(entity_info, type_info, when)
entity_type = entity_info.alert_entity.entity_id,
entity_value = entity_info.alert_entity_val,
type = type_info.alert_type.alert_id,
severity = type_info.alert_type.severity.severity_id,
severity = type_info.alert_severity.severity_id,
message = alert_json,
subtype = subtype,
tstamp = when,
action = action,
action = "engage",
}
return(enqueueAlertEvent(alert_event))
@ -443,7 +386,7 @@ function alerts_api.release(entity_info, type_info, when)
entity_type = entity_info.alert_entity.entity_id,
entity_value = entity_info.alert_entity_val,
type = type_info.alert_type.alert_id,
severity = type_info.alert_type.severity.severity_id,
severity = type_info.alert_severity.severity_id,
subtype = subtype,
tstamp = released.alert_tstamp,
tstamp_end = released.alert_tstamp_end,
@ -499,37 +442,421 @@ function alerts_api.networkAlertEntity(network_cidr)
}
end
-- ##############################################
-- type_info building functions
-- ##############################################
function alerts_api.thresholdCrossType(granularity, metric, value, operator, threshold)
local res = {
alert_type = alert_consts.alert_types.threshold_cross,
alert_subtype = string.format("%s_%s", granularity, metric),
alert_granularity = alert_consts.alerts_granularities[granularity],
alert_type_params = {
metric = metric, value = value,
operator = operator, threshold = threshold,
}
function alerts_api.snmpInterfaceEntity(snmp_device, snmp_interface)
return {
alert_entity = alert_consts.alert_entities.snmp_device,
alert_entity_val = string.format("%s_ifidx%d", snmp_device, snmp_interface)
}
return(res)
end
-- ##############################################
function alerts_api.anomalyType(anomal_name, alert_type, value, threshold)
local res = {
alert_type = alert_type,
alert_subtype = anomal_name,
function alerts_api.macEntity(mac)
return {
alert_entity = alert_consts.alert_entities.mac,
alert_entity_val = mac
}
end
-- ##############################################
function alerts_api.userEntity(user)
return {
alert_entity = alert_consts.alert_entities.user,
alert_entity_val = user
}
end
-- ##############################################
function alerts_api.processEntity(process)
return {
alert_entity = alert_consts.alert_entities.process,
alert_entity_val = process
}
end
-- ##############################################
function alerts_api.hostPoolEntity(pool_id)
return {
alert_entity = alert_consts.alert_entities.host_pool,
alert_entity_val = tostring(pool_id)
}
end
-- ##############################################
function alerts_api.periodicActivityEntity(activity_path)
return {
alert_entity = alert_consts.alert_entities.periodic_activity,
alert_entity_val = activity_path
}
end
-- ##############################################
function alerts_api.pingedHostEntity(host)
return {
alert_entity = alert_consts.alert_entities.pinged_host,
alert_entity_val = host
}
end
-- ##############################################
function alerts_api.categoryListsEntity(list_name)
return {
alert_entity = alert_consts.alert_entities.influx_db,
alert_entity_val = list_name
}
end
-- ##############################################
function alerts_api.influxdbEntity(dburl)
return {
alert_entity = alert_consts.alert_entities.category_lists,
alert_entity_val = dburl
}
end
-- ##############################################
-- type_info building functions
-- ##############################################
function alerts_api.thresholdCrossType(granularity, metric, value, operator, threshold)
return({
alert_type = alert_consts.alert_types.threshold_cross,
alert_subtype = string.format("%s_%s", granularity, metric),
alert_granularity = alert_consts.alerts_granularities[granularity],
alert_severity = alert_consts.alert_severities.error,
alert_type_params = {
metric = metric, value = value,
operator = operator, threshold = threshold,
}
})
end
-- ##############################################
function alerts_api.pingIssuesType(value, threshold, ip)
return({
alert_type = alert_consts.alert_types.ping_issues,
alert_severity = alert_consts.alert_severities.warning,
alert_granularity = alert_consts.alerts_granularities.min,
alert_type_params = {
value = value,
threshold = threshold,
value = value, threshold = threshold, ip = ip,
}
}
})
end
return(res)
-- ##############################################
function alerts_api.userActivityType(scope, name, params, remote_addr, status)
return({
alert_type = alert_consts.alert_types.alert_user_activity,
alert_severity = alert_consts.alert_severities.info,
alert_type_params = {
scope = scope, name = name, params = params,
remote_addr = remote_addr, status = status,
}
})
end
-- ##############################################
function alerts_api.loginFailedType()
return({
alert_type = alert_consts.alert_types.login_failed,
alert_severity = alert_consts.alert_severities.warning,
alert_type_params = {},
})
end
-- ##############################################
function alerts_api.processNotificationType(event_type, severity, msg_details)
return({
alert_type = alert_consts.alert_types.process_notification,
alert_severity = alert_consts.alert_severities[alertSeverityRaw(severity)],
alert_type_params = {
msg_details = msg_details,
event_type = event_type,
},
})
end
-- ##############################################
function alerts_api.listDownloadFailedType(list_name, last_error)
return({
alert_type = alert_consts.alert_types.list_download_failed,
alert_severity = alert_consts.alert_severities.error,
alert_type_params = {
name=list_name, err=last_error
}
})
end
-- ##############################################
function alerts_api.influxdbDroppedPointsType(influxdb_url)
return({
alert_type = alert_consts.alert_types.influxdb_export_failure,
alert_severity = alert_consts.alert_severities.error,
alert_granularity = alert_consts.alerts_granularities.min,
alert_type_params = {
influxdb = influxdb_url,
},
})
end
-- ##############################################
function alerts_api.newDeviceType(device_name)
return({
alert_type = alert_consts.alert_types.new_device,
alert_severity = alert_consts.alert_severities.warning,
alert_type_params = {
device = device_name,
},
})
end
-- ##############################################
function alerts_api.deviceHasConnectedType(device_name)
return({
alert_type = alert_consts.alert_types.device_connection,
alert_severity = alert_consts.alert_severities.info,
alert_type_params = {
device = device_name,
},
})
end
-- ##############################################
function alerts_api.deviceHasDisconnectedType(device_name)
return({
alert_type = alert_consts.alert_types.device_disconnection,
alert_severity = alert_consts.alert_severities.info,
alert_type_params = {
device = device_name,
},
})
end
-- ##############################################
function alerts_api.poolQuotaExceededType(pool, proto, subtype, value, quota)
local host_pools_utils = require("host_pools_utils")
return({
alert_type = alert_consts.alert_types.quota_exceeded,
alert_subtype = subtype,
alert_severity = alert_consts.alert_severities.warning,
alert_type_params = {
pool = host_pools_utils.getPoolName(interface.getId(), pool),
proto = proto, value = value, quota = quota,
},
})
end
-- ##############################################
function alerts_api.poolConnectionType(pool)
local host_pools_utils = require("host_pools_utils")
return({
alert_type = alert_consts.alert_types.host_pool_connection,
alert_severity = alert_consts.alert_severities.info,
alert_type_params = {
pool = host_pools_utils.getPoolName(interface.getId(), pool),
},
})
end
-- ##############################################
function alerts_api.poolDisconnectionType(pool)
local host_pools_utils = require("host_pools_utils")
return({
alert_type = alert_consts.alert_types.host_pool_disconnection,
alert_severity = alert_consts.alert_severities.info,
alert_type_params = {
pool = host_pools_utils.getPoolName(interface.getId(), pool),
},
})
end
-- ##############################################
function alerts_api.macIpAssociationChangeType(device, ip, old_mac, new_mac)
return({
alert_type = alert_consts.alert_types.mac_ip_association_change,
alert_severity = alert_consts.alert_severities.warning,
alert_type_params = {
device = device, ip = ip,
old_mac = old_mac, new_mac = new_mac,
},
})
end
-- ##############################################
function alerts_api.broadcastDomainTooLargeType(src_mac, dst_mac, vlan, spa, tpa)
return({
alert_type = alert_consts.alert_types.broadcast_domain_too_large,
alert_severity = alert_consts.alert_severities.error,
alert_type_params = {
src_mac = src_mac, dst_mac = dst_mac,
spa = spa, tpa = tpa, vlan_id = vlan,
},
})
end
-- ##############################################
function alerts_api.nfqFlushedType(ifname, pct, tot, dropped)
return({
alert_type = alert_consts.alert_types.nfq_flushed,
alert_severity = alert_consts.alert_severities.error,
alert_type_params = {
ifname = ifname, pct = pct, tot = tot, dropped = dropped,
},
})
end
-- ##############################################
function alerts_api.remoteToRemoteType(host_info, mac)
return({
alert_type = alert_consts.alert_types.remote_to_remote,
alert_severity = alert_consts.alert_severities.warning,
alert_type_params = {
host = getResolvedAddress(host_info),
mac = mac,
},
})
end
-- ##############################################
function alerts_api.slowPeriodicActivityType(duration_ms, max_duration_ms)
return({
alert_type = alert_consts.alert_types.slow_periodic_activity,
alert_severity = alert_consts.alert_severities.warning,
alert_type_params = {
duration_ms = duration_ms,
max_duration_ms = max_duration_ms
},
})
end
-- ##############################################
function alerts_api.ipOutsideDHCPRangeType(router_info, mac, client_mac, sender_mac)
return({
alert_type = alert_consts.alert_types.ip_outsite_dhcp_range,
alert_severity = alert_consts.alert_severities.warning,
alert_type_params = {
router_info = hostinfo2hostkey(router_info),
mac = mac, client_mac = client_mac, sender_mac = sender_mac,
router_host = getResolvedAddress(router_info),
},
})
end
-- ##############################################
function alerts_api.snmpInterfaceStatusChangeType(device, interface, interface_name, status)
return({
alert_type = alert_consts.alert_types.port_status_change,
alert_severity = alert_consts.alert_severities.info,
alert_type_params = {
device = device, interface = interface,
interface_name = interface_name, status = status,
},
})
end
-- ##############################################
function alerts_api.snmpInterfaceDuplexStatusChangeType(device, interface, interface_name, status)
return({
alert_type = alert_consts.alert_types.port_duplexstatus_change,
alert_severity = alert_consts.alert_severities.warning,
alert_type_params = {
device = device, interface = interface,
interface_name = interface_name, status = status,
},
})
end
-- ##############################################
function alerts_api.snmpInterfaceErrorsType(device, interface, interface_name)
return({
alert_type = alert_consts.alert_types.port_errors,
alert_severity = alert_consts.alert_severities.info,
alert_type_params = {
device = device, interface = interface,
interface_name = interface_name,
},
})
end
-- ##############################################
function alerts_api.snmpPortLoadThresholdExceededType(device, interface, interface_name, interface_load, in_direction)
return({
alert_type = alert_consts.alert_types.port_load_threshold_exceeded,
alert_severity = alert_consts.alert_severities.warning,
alert_type_params = {
device = device, interface = interface,
interface_name = interface_name,
interface_load = interface_load, in_direction = in_direction,
},
})
end
-- ##############################################
function alerts_api.misconfiguredAppType(subtype)
return({
alert_type = alert_consts.alert_types.misconfigured_app,
alert_subtype = subtype,
alert_severity = alert_consts.alert_severities.error,
alert_granularity = alert_consts.alerts_granularities.min,
alert_type_params = {},
})
end
-- ##############################################
function alerts_api.tooManyDropsType()
return({
alert_type = alert_consts.alert_types.too_many_drops,
alert_severity = alert_consts.alert_severities.error,
alert_granularity = alert_consts.alerts_granularities.min,
alert_type_params = {},
})
end
-- ##############################################
function alerts_api.slowStatsUpdateType()
return({
alert_type = alert_consts.alert_types.slow_stats_update,
alert_severity = alert_consts.alert_severities.warning,
alert_granularity = alert_consts.alerts_granularities.min,
alert_type_params = {},
})
end
-- ##############################################
@ -572,6 +899,12 @@ end
-- ##############################################
-- An alert check function which performs threshold checks of a value
-- against a configured threshold and generates a threshold_cross alert
-- if the value is above the threshold.
-- A check_module must implement:
-- get_threshold_value(granularity, entity_info)
-- A function, which returns the current value to be compared agains the threshold
function alerts_api.threshold_check_function(params)
local alarmed = false
local value = params.check_module.get_threshold_value(params.granularity, params.entity_info)
@ -587,7 +920,7 @@ function alerts_api.threshold_check_function(params)
end
if(alarmed) then
return(alerts_api.new_trigger(params.alert_entity, threshold_type))
return(alerts_api.trigger(params.alert_entity, threshold_type))
else
return(alerts_api.release(params.alert_entity, threshold_type))
end
@ -595,17 +928,18 @@ end
-- ##############################################
function alerts_api.check_anomaly(anomal_name, alert_type, alert_entity, entity_anomalies, anomal_config)
local anomaly = entity_anomalies[anomal_name] or {value = 0}
local value = anomaly.value
local anomaly_type = alerts_api.anomalyType(anomal_name, alert_type, value, anomal_config.threshold)
-- An alert check function which checks for anomalies.
-- The check_module key is the type of the anomaly to check.
-- The check_module must implement a anomaly_type(anomaly_key) function
-- which returns a type_info for the given anomaly.
function alerts_api.anomaly_check_function(params)
local anomal_key = params.check_module.key
local type_info = params.check_module.anomaly_type(anomal_key)
if(do_trace) then print("[Anomaly check] ".. alert_entity.alert_entity_val .." ["..anomal_name.."]\n") end
if(anomaly ~= nil) then
return(alerts_api.new_trigger(alert_entity, anomaly_type))
if params.entity_info.anomalies[anomal_key] then
return alerts_api.trigger(params.alert_entity, type_info)
else
return(alerts_api.release(alert_entity, threshold_type))
return alerts_api.release(params.alert_entity, type_info)
end
end