Refactor entity list handling

This commit is contained in:
Patrick Pacher 2020-04-20 11:36:34 +02:00
parent 42ccb3e39a
commit eeb358425d
No known key found for this signature in database
GPG key ID: E8CD2DA160925A6D
7 changed files with 167 additions and 217 deletions

View file

@ -16,6 +16,43 @@ import (
"golang.org/x/net/publicsuffix" "golang.org/x/net/publicsuffix"
) )
// ListMatch represents an entity that has been
// matched against filterlists.
type ListMatch struct {
Entity string
ActiveLists []string
InactiveLists []string
}
func (lm *ListMatch) String() string {
inactive := ""
if len(lm.InactiveLists) > 0 {
inactive = " and in deactivated lists " + strings.Join(lm.InactiveLists, ", ")
}
return fmt.Sprintf(
"%s in activated lists %s%s",
lm.Entity,
strings.Join(lm.ActiveLists, ","),
inactive,
)
}
// ListBlockReason is a list of list matches.
type ListBlockReason []ListMatch
func (br ListBlockReason) String() string {
if len(br) == 0 {
return ""
}
matches := make([]string, len(br))
for idx, lm := range br {
matches[idx] = lm.String()
}
return strings.Join(matches, " and ")
}
// Entity describes a remote endpoint in many different ways. // Entity describes a remote endpoint in many different ways.
// It embeddes a sync.Mutex but none of the endpoints own // It embeddes a sync.Mutex but none of the endpoints own
// functions performs locking. The caller MUST ENSURE // functions performs locking. The caller MUST ENSURE
@ -60,8 +97,18 @@ type Entity struct {
location *geoip.Location location *geoip.Location
Lists []string // BlockedByLists holds list source IDs that
ListsMap filterlists.LookupMap // are used to block the entity.
BlockedByLists []string
// BlockedEntities holds a list of entities that
// have been blocked. Values can be used as a key
// for the ListOccurences map.
BlockedEntities []string
// ListOccurences is a map that matches an entity (Domain, IPs, ASN, Country, Sub-domain)
// to a list of sources where the entity has been observed in.
ListOccurences map[string][]string
// we only load each data above at most once // we only load each data above at most once
fetchLocationOnce sync.Once fetchLocationOnce sync.Once
@ -90,8 +137,11 @@ func (e *Entity) ResetLists() {
// TODO(ppacher): our actual goal is to reset the domain // TODO(ppacher): our actual goal is to reset the domain
// list right now so we could be more efficient by keeping // list right now so we could be more efficient by keeping
// the other lists around. // the other lists around.
e.Lists = nil
e.ListsMap = nil // FIXME
//e.Lists = nil
//e.ListsMap = nil
e.ListOccurences = nil
e.domainListLoaded = false e.domainListLoaded = false
e.ipListLoaded = false e.ipListLoaded = false
e.countryListLoaded = false e.countryListLoaded = false
@ -238,9 +288,19 @@ func (e *Entity) getLists() {
e.getCountryLists() e.getCountryLists()
} }
func (e *Entity) mergeList(list []string) { func (e *Entity) mergeList(key string, list []string) {
e.Lists = mergeStringList(e.Lists, list) if len(list) == 0 {
e.ListsMap = buildLookupMap(e.Lists) return
}
if e.ListOccurences == nil {
e.ListOccurences = make(map[string][]string)
}
e.ListOccurences[key] = mergeStringList(e.ListOccurences[key], list)
//e.Lists = mergeStringList(e.Lists, list)
//e.ListsMap = buildLookupMap(e.Lists)
} }
func (e *Entity) getDomainLists() { func (e *Entity) getDomainLists() {
@ -284,7 +344,7 @@ func (e *Entity) getDomainLists() {
return return
} }
e.mergeList(list) e.mergeList(d, list)
} }
e.domainListLoaded = true e.domainListLoaded = true
}) })
@ -328,7 +388,8 @@ func (e *Entity) getASNLists() {
log.Tracef("intel: loading ASN list for %d", asn) log.Tracef("intel: loading ASN list for %d", asn)
e.loadAsnListOnce.Do(func() { e.loadAsnListOnce.Do(func() {
list, err := filterlists.LookupASNString(fmt.Sprintf("%d", asn)) asnStr := fmt.Sprintf("%d", asn)
list, err := filterlists.LookupASNString(asnStr)
if err != nil { if err != nil {
log.Errorf("intel: failed to get ASN blocklist for %d: %s", asn, err) log.Errorf("intel: failed to get ASN blocklist for %d: %s", asn, err)
e.loadAsnListOnce = sync.Once{} e.loadAsnListOnce = sync.Once{}
@ -336,7 +397,7 @@ func (e *Entity) getASNLists() {
} }
e.asnListLoaded = true e.asnListLoaded = true
e.mergeList(list) e.mergeList(asnStr, list)
}) })
} }
@ -360,7 +421,7 @@ func (e *Entity) getCountryLists() {
} }
e.countryListLoaded = true e.countryListLoaded = true
e.mergeList(list) e.mergeList(country, list)
}) })
} }
@ -393,28 +454,71 @@ func (e *Entity) getIPLists() {
return return
} }
e.ipListLoaded = true e.ipListLoaded = true
e.mergeList(list) e.mergeList(ip.String(), list)
}) })
} }
// GetLists returns the filter list identifiers the entity matched and whether this data is set. // LoadLists searches all filterlists for all occurences of
func (e *Entity) GetLists() ([]string, bool) { // this entity.
func (e *Entity) LoadLists() bool {
e.getLists() e.getLists()
if e.Lists == nil { if e.ListOccurences == nil {
return nil, false return false
} }
return e.Lists, true return true
} }
// GetListsMap is like GetLists but returns a lookup map for list IDs. // MatchLists matches the entities lists against a slice
func (e *Entity) GetListsMap() (filterlists.LookupMap, bool) { // of source IDs and updates various entity properties
e.getLists() // like BlockedByLists, ListOccurences and BlockedEntitites.
func (e *Entity) MatchLists(lists []string) bool {
e.BlockedByLists = nil
e.BlockedEntities = nil
if e.ListsMap == nil { lm := makeMap(lists)
return nil, false for key, keyLists := range e.ListOccurences {
for _, keyListID := range keyLists {
if _, ok := lm[keyListID]; ok {
e.BlockedByLists = append(e.BlockedByLists, keyListID)
e.BlockedEntities = append(e.BlockedEntities, key)
} }
return e.ListsMap, true }
}
makeDistinct(e.BlockedByLists)
return len(e.BlockedByLists) > 0
}
// ListBlockReason returns the block reason for this entity.
func (e *Entity) ListBlockReason() ListBlockReason {
blockedBy := make([]ListMatch, len(e.BlockedEntities))
lm := makeMap(e.BlockedByLists)
for idx, blockedEntity := range e.BlockedEntities {
if entityLists, ok := e.ListOccurences[blockedEntity]; ok {
var activeLists []string
var inactiveLists []string
for _, l := range entityLists {
if _, ok := lm[l]; ok {
activeLists = append(activeLists, l)
} else {
inactiveLists = append(inactiveLists, l)
}
}
blockedBy[idx] = ListMatch{
Entity: blockedEntity,
ActiveLists: activeLists,
InactiveLists: inactiveLists,
}
}
}
return blockedBy
} }
func mergeStringList(a, b []string) []string { func mergeStringList(a, b []string) []string {
@ -434,21 +538,26 @@ func mergeStringList(a, b []string) []string {
return res return res
} }
func buildLookupMap(l []string) filterlists.LookupMap {
m := make(filterlists.LookupMap, len(l))
for _, s := range l {
m[s] = struct{}{}
}
return m
}
func makeDistinct(slice []string) []string { func makeDistinct(slice []string) []string {
lm := buildLookupMap(slice) m := make(map[string]struct{}, len(slice))
result := make([]string, 0, len(lm)) var result []string
for key := range lm {
result = append(result, key) for _, v := range slice {
if _, ok := m[v]; ok {
continue
} }
m[v] = struct{}{}
result = append(result, v)
}
return result return result
} }
func makeMap(slice []string) map[string]struct{} {
lm := make(map[string]struct{})
for _, v := range slice {
lm[v] = struct{}{}
}
return lm
}

View file

@ -1,25 +0,0 @@
package filterlists
import "strings"
// LookupMap is a helper type for matching a list of endpoint sources
// against a map.
type LookupMap map[string]struct{}
// Match checks if a source in `list` is part of lm.
// Matches are joined to string and returned.
// If nothing is found, an empty string is returned.
func (lm LookupMap) Match(list []string) string {
matches := make([]string, 0, len(list))
for _, l := range list {
if _, ok := lm[l]; ok {
matches = append(matches, l)
}
}
if len(matches) == 0 {
return ""
}
return strings.Join(matches, ", ")
}

View file

@ -1,92 +0,0 @@
package filterlists
/*
func TestLookupASN(t *testing.T) {
lists, err := LookupASNString("123")
assert.NoError(t, err)
assert.Equal(t, []string{"TEST"}, lists)
lists, err = LookupASNString("does-not-exist")
assert.NoError(t, err)
assert.Empty(t, lists)
defer testMarkNotLoaded()()
lists, err = LookupASNString("123")
assert.NoError(t, err)
assert.Empty(t, lists)
}
func TestLookupCountry(t *testing.T) {
lists, err := LookupCountry("AT")
assert.NoError(t, err)
assert.Equal(t, []string{"TEST"}, lists)
lists, err = LookupCountry("does-not-exist")
assert.NoError(t, err)
assert.Empty(t, lists)
defer testMarkNotLoaded()()
lists, err = LookupCountry("AT")
assert.NoError(t, err)
assert.Empty(t, lists)
}
func TestLookupIP(t *testing.T) {
lists, err := LookupIP(net.IP{1, 1, 1, 1})
assert.NoError(t, err)
assert.Equal(t, []string{"TEST"}, lists)
lists, err = LookupIP(net.IP{127, 0, 0, 1})
assert.NoError(t, err)
assert.Empty(t, lists)
defer testMarkNotLoaded()()
lists, err = LookupIP(net.IP{1, 1, 1, 1})
assert.NoError(t, err)
assert.Empty(t, lists)
}
func TestLookupDomain(t *testing.T) {
lists, err := LookupDomain("example.com")
assert.NoError(t, err)
assert.Equal(t, []string{"TEST"}, lists)
lists, err = LookupDomain("does-not-exist")
assert.NoError(t, err)
assert.Empty(t, lists)
defer testMarkNotLoaded()()
lists, err = LookupDomain("example.com")
assert.NoError(t, err)
assert.Empty(t, lists)
}
// testMarkNotLoaded ensures that functions believe
// filterlists are not yet loaded. It returns a
// func that restores the previous state.
func testMarkNotLoaded() func() {
if isLoaded() {
filterListsLoaded = make(chan struct{})
return func() {
close(filterListsLoaded)
}
}
return func() {}
}
// testMarkLoaded is like testMarkNotLoaded but ensures
// isLoaded() return true. It returns a function to restore
// the previous state.
func testMarkLoaded() func() {
if !isLoaded() {
close(filterListsLoaded)
return func() {
filterListsLoaded = make(chan struct{})
}
}
return func() {}
}
*/

View file

@ -1,40 +0,0 @@
package intel
// ListSet holds a set of list IDs.
type ListSet struct {
match []string
}
// NewListSet returns a new ListSet with the given list IDs.
func NewListSet(lists []string) *ListSet {
// TODO: validate lists
return &ListSet{
match: lists,
}
}
// Matches returns whether there is a match in the given list IDs.
func (ls *ListSet) Matches(lists []string) (matches bool) {
for _, list := range lists {
for _, entry := range ls.match {
if entry == list {
return true
}
}
}
return false
}
// MatchSet returns the matching list IDs.
func (ls *ListSet) MatchSet(lists []string) (matched []string) {
for _, list := range lists {
for _, entry := range ls.match {
if entry == list {
matched = append(matched, list)
}
}
}
return
}

View file

@ -10,21 +10,19 @@ import (
type EndpointLists struct { type EndpointLists struct {
EndpointBase EndpointBase
ListSet *intel.ListSet ListSet []string
Lists string Lists string
Reason string Reason string
} }
// Matches checks whether the given entity matches this endpoint definition. // Matches checks whether the given entity matches this endpoint definition.
func (ep *EndpointLists) Matches(entity *intel.Entity) (result EPResult, reason string) { func (ep *EndpointLists) Matches(entity *intel.Entity) (result EPResult, reason string) {
lists, ok := entity.GetLists() entity.LoadLists()
if !ok {
return Undeterminable, "" if entity.MatchLists(ep.ListSet) {
} return ep.matchesPPP(entity), entity.ListBlockReason().String()
matched := ep.ListSet.MatchSet(lists)
if len(matched) > 0 {
return ep.matchesPPP(entity), ep.Reason
} }
return NoMatch, "" return NoMatch, ""
} }
@ -36,7 +34,7 @@ func parseTypeList(fields []string) (Endpoint, error) {
if strings.HasPrefix(fields[1], "L:") { if strings.HasPrefix(fields[1], "L:") {
lists := strings.Split(strings.TrimPrefix(fields[1], "L:"), ",") lists := strings.Split(strings.TrimPrefix(fields[1], "L:"), ",")
ep := &EndpointLists{ ep := &EndpointLists{
ListSet: intel.NewListSet(lists), ListSet: lists,
Lists: "L:" + strings.Join(lists, ","), Lists: "L:" + strings.Join(lists, ","),
Reason: "matched lists " + strings.Join(lists, ","), Reason: "matched lists " + strings.Join(lists, ","),
} }

View file

@ -11,7 +11,7 @@ import (
// Endpoint describes an Endpoint Matcher // Endpoint describes an Endpoint Matcher
type Endpoint interface { type Endpoint interface {
Matches(entity *intel.Entity) (result EPResult, reason string) Matches(entity *intel.Entity) (EPResult, string)
String() string String() string
} }

View file

@ -243,27 +243,27 @@ func (lp *LayeredProfile) MatchFilterLists(entity *intel.Entity) (endpoints.EPRe
entity.ResolveSubDomainLists(lp.FilterSubDomains()) entity.ResolveSubDomainLists(lp.FilterSubDomains())
entity.EnableCNAMECheck(lp.FilterCNAMEs()) entity.EnableCNAMECheck(lp.FilterCNAMEs())
lookupMap, hasLists := entity.GetListsMap()
if !hasLists {
return endpoints.NoMatch, ""
}
for _, layer := range lp.layers { for _, layer := range lp.layers {
if reason := lookupMap.Match(layer.filterListIDs); reason != "" { // search for the first layer that has filterListIDs set
return endpoints.Denied, reason if len(layer.filterListIDs) > 0 {
entity.LoadLists()
if entity.MatchLists(layer.filterListIDs) {
return endpoints.Denied, entity.ListBlockReason().String()
} }
// only check the first layer that has filter list
// IDs defined.
if len(layer.filterListIDs) > 0 {
return endpoints.NoMatch, "" return endpoints.NoMatch, ""
} }
} }
cfgLock.RLock() cfgLock.RLock()
defer cfgLock.RUnlock() defer cfgLock.RUnlock()
if reason := lookupMap.Match(cfgFilterLists); reason != "" { if len(cfgFilterLists) > 0 {
return endpoints.Denied, reason entity.LoadLists()
if entity.MatchLists(cfgFilterLists) {
return endpoints.Denied, entity.ListBlockReason().String()
}
} }
return endpoints.NoMatch, "" return endpoints.NoMatch, ""