mirror of
https://github.com/vel21ripn/nDPI.git
synced 2026-05-02 00:40:17 +00:00
Domain Classification Improvements (#2396)
* Added size_t ndpi_compress_str(const char * in, size_t len, char * out, size_t bufsize); size_t ndpi_decompress_str(const char * in, size_t len, char * out, size_t bufsize); used to compress short strings such as domain names. This code is based on https://github.com/Ed-von-Schleck/shoco * Major code rewrite for ndpi_hash and ndpi_domain_classify * Improvements to make sure custom categories are loaded and enabled * Fixed string encoding * Extended SalesForce/Cloudflare domains list
This commit is contained in:
parent
108b8331d5
commit
ad117bfaab
65 changed files with 1194 additions and 612 deletions
|
|
@ -1,7 +1,7 @@
|
|||
/*
|
||||
* ndpiReader.c
|
||||
*
|
||||
* Copyright (C) 2011-23 - ntop.org
|
||||
* Copyright (C) 2011-24 - ntop.org
|
||||
*
|
||||
* nDPI is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as published by
|
||||
|
|
@ -377,21 +377,21 @@ void ndpiCheckHostStringMatch(char *testChar) {
|
|||
testChar, strlen(testChar), &match);
|
||||
|
||||
if(testRes) {
|
||||
memset( &detected_protocol, 0, sizeof(ndpi_protocol) );
|
||||
memset(&detected_protocol, 0, sizeof(ndpi_protocol) );
|
||||
|
||||
detected_protocol.app_protocol = match.protocol_id;
|
||||
detected_protocol.master_protocol = 0;
|
||||
detected_protocol.category = match.protocol_category;
|
||||
|
||||
ndpi_protocol2name( ndpi_str, detected_protocol, appBufStr,
|
||||
sizeof(appBufStr));
|
||||
ndpi_protocol2name(ndpi_str, detected_protocol, appBufStr,
|
||||
sizeof(appBufStr));
|
||||
|
||||
printf("Match Found for string [%s] -> P(%d) B(%d) C(%d) => %s %s %s\n",
|
||||
testChar, match.protocol_id, match.protocol_breed,
|
||||
match.protocol_category,
|
||||
appBufStr,
|
||||
ndpi_get_proto_breed_name( match.protocol_breed ),
|
||||
ndpi_category_get_name( ndpi_str, match.protocol_category));
|
||||
ndpi_get_proto_breed_name(match.protocol_breed ),
|
||||
ndpi_category_get_name(ndpi_str, match.protocol_category));
|
||||
} else
|
||||
printf("Match NOT Found for string: %s\n\n", testChar );
|
||||
|
||||
|
|
@ -675,17 +675,17 @@ static void help(u_int long_help) {
|
|||
max_num_reported_top_payloads, max_num_tcp_dissected_pkts, max_num_udp_dissected_pkts);
|
||||
|
||||
NDPI_PROTOCOL_BITMASK all;
|
||||
struct ndpi_detection_module_struct *ndpi_info_mod = ndpi_init_detection_module(NULL);
|
||||
struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
|
||||
NDPI_BITMASK_SET_ALL(all);
|
||||
ndpi_set_protocol_detection_bitmask2(ndpi_info_mod, &all);
|
||||
ndpi_set_protocol_detection_bitmask2(ndpi_str, &all);
|
||||
|
||||
if(_protoFilePath != NULL)
|
||||
ndpi_load_protocols_file(ndpi_info_mod, _protoFilePath);
|
||||
ndpi_load_protocols_file(ndpi_str, _protoFilePath);
|
||||
|
||||
ndpi_finalize_initialization(ndpi_info_mod);
|
||||
ndpi_finalize_initialization(ndpi_str);
|
||||
|
||||
printf("\nProtocols configuration parameters:\n");
|
||||
ndpi_dump_config(ndpi_info_mod, stdout);
|
||||
ndpi_dump_config(ndpi_str, stdout);
|
||||
|
||||
#ifndef WIN32
|
||||
printf("\nExcap (wireshark) options:\n"
|
||||
|
|
@ -712,13 +712,13 @@ static void help(u_int long_help) {
|
|||
"Id", "Userd-id", "Protocol", "Layer_4", "Nw_Proto", "Breed", "Category");
|
||||
num_threads = 1;
|
||||
|
||||
ndpi_dump_protocols(ndpi_info_mod, stdout);
|
||||
ndpi_dump_protocols(ndpi_str, stdout);
|
||||
|
||||
printf("\n\nnDPI supported risks:\n");
|
||||
ndpi_dump_risks_score(stdout);
|
||||
}
|
||||
|
||||
ndpi_exit_detection_module(ndpi_info_mod);
|
||||
ndpi_exit_detection_module(ndpi_str);
|
||||
|
||||
exit(!long_help);
|
||||
}
|
||||
|
|
@ -839,10 +839,10 @@ void extcap_config() {
|
|||
ndpi_proto_defaults_t *proto_defaults;
|
||||
#endif
|
||||
|
||||
struct ndpi_detection_module_struct *ndpi_info_mod = ndpi_init_detection_module(NULL);
|
||||
struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
|
||||
#if 0
|
||||
ndpi_num_supported_protocols = ndpi_get_ndpi_num_supported_protocols(ndpi_info_mod);
|
||||
proto_defaults = ndpi_get_proto_defaults(ndpi_info_mod);
|
||||
ndpi_num_supported_protocols = ndpi_get_ndpi_num_supported_protocols(ndpi_str);
|
||||
proto_defaults = ndpi_get_proto_defaults(ndpi_str);
|
||||
#endif
|
||||
|
||||
/* -i <interface> */
|
||||
|
|
@ -876,7 +876,7 @@ void extcap_config() {
|
|||
ndpi_free(protos);
|
||||
#endif
|
||||
|
||||
ndpi_exit_detection_module(ndpi_info_mod);
|
||||
ndpi_exit_detection_module(ndpi_str);
|
||||
|
||||
extcap_exit = 1;
|
||||
}
|
||||
|
|
@ -1347,10 +1347,10 @@ static void parseOptions(int argc, char **argv) {
|
|||
|
||||
case '9':
|
||||
{
|
||||
struct ndpi_detection_module_struct *ndpi_info_mod = ndpi_init_detection_module(NULL);
|
||||
extcap_packet_filter = ndpi_get_proto_by_name(ndpi_info_mod, optarg);
|
||||
struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
|
||||
extcap_packet_filter = ndpi_get_proto_by_name(ndpi_str, optarg);
|
||||
if(extcap_packet_filter == NDPI_PROTOCOL_UNKNOWN) extcap_packet_filter = atoi(optarg);
|
||||
ndpi_exit_detection_module(ndpi_info_mod);
|
||||
ndpi_exit_detection_module(ndpi_str);
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
@ -1524,7 +1524,7 @@ void print_bin(FILE *fout, const char *label, struct ndpi_bin *b) {
|
|||
static void print_ndpi_address_port(FILE *out, const char *label, ndpi_address_port *ap) {
|
||||
if(ap->port != 0) {
|
||||
char buf[INET6_ADDRSTRLEN];
|
||||
|
||||
|
||||
if(ap->is_ipv6) {
|
||||
inet_ntop(AF_INET6, &ap->address, buf, sizeof(buf));
|
||||
fprintf(out, "[%s: [%s]:%u]", label, buf, ap->port);
|
||||
|
|
@ -1892,7 +1892,7 @@ static void printFlow(u_int32_t id, struct ndpi_flow_info *flow, u_int16_t threa
|
|||
print_ndpi_address_port(out, "Relayed IP/Port", &flow->stun.relayed_address);
|
||||
print_ndpi_address_port(out, "Rsp Origin IP/Port", &flow->stun.response_origin);
|
||||
print_ndpi_address_port(out, "Other IP/Port", &flow->stun.other_address);
|
||||
|
||||
|
||||
if(flow->http.url[0] != '\0') {
|
||||
ndpi_risk_enum risk = ndpi_validate_url(flow->http.url);
|
||||
|
||||
|
|
@ -3883,7 +3883,7 @@ static void printResults(u_int64_t processing_time_usec, u_int64_t setup_time_us
|
|||
if(!quiet_mode) {
|
||||
printf("\nnDPI Memory statistics:\n");
|
||||
printf("\tnDPI Memory (once): %-13s\n", formatBytes(ndpi_get_ndpi_detection_module_size(), buf, sizeof(buf)));
|
||||
printf("\tFlow Memory (per flow): %-13s\n", formatBytes( ndpi_detection_get_sizeof_ndpi_flow_struct(), buf, sizeof(buf)));
|
||||
printf("\tFlow Memory (per flow): %-13s\n", formatBytes(ndpi_detection_get_sizeof_ndpi_flow_struct(), buf, sizeof(buf)));
|
||||
printf("\tActual Memory: %-13s\n", formatBytes(current_ndpi_memory, buf, sizeof(buf)));
|
||||
printf("\tPeak Memory: %-13s\n", formatBytes(max_ndpi_memory, buf, sizeof(buf)));
|
||||
printf("\tSetup Time: %lu msec\n", (unsigned long)(setup_time_usec/1000));
|
||||
|
|
@ -5191,21 +5191,21 @@ void rsiUnitTest() {
|
|||
void hashUnitTest() {
|
||||
ndpi_str_hash *h;
|
||||
char * const dict[] = { "hello", "world", NULL };
|
||||
int i;
|
||||
u_int16_t i;
|
||||
|
||||
assert(ndpi_hash_init(&h) == 0);
|
||||
assert(h == NULL);
|
||||
|
||||
for(i=0; dict[i] != NULL; i++) {
|
||||
u_int8_t l = strlen(dict[i]);
|
||||
int * v;
|
||||
u_int16_t v;
|
||||
|
||||
assert(ndpi_hash_add_entry(&h, dict[i], l, &i) == 0);
|
||||
assert(ndpi_hash_find_entry(h, dict[i], l, (void **)&v) == 0);
|
||||
assert(v == (void *)&i && *v == i);
|
||||
assert(ndpi_hash_add_entry(&h, dict[i], l, i) == 0);
|
||||
assert(ndpi_hash_find_entry(h, dict[i], l, &v) == 0);
|
||||
assert(v == i);
|
||||
}
|
||||
|
||||
ndpi_hash_free(&h, NULL);
|
||||
ndpi_hash_free(&h);
|
||||
assert(h == NULL);
|
||||
}
|
||||
|
||||
|
|
@ -5797,31 +5797,74 @@ void loadStressTest() {
|
|||
|
||||
/* *********************************************** */
|
||||
|
||||
void domainsUnitTest() {
|
||||
void encodeDomainsUnitTest() {
|
||||
NDPI_PROTOCOL_BITMASK all;
|
||||
struct ndpi_detection_module_struct *ndpi_info_mod = ndpi_init_detection_module(NULL);
|
||||
struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
|
||||
const char *lists_path = "../lists/public_suffix_list.dat";
|
||||
struct stat st;
|
||||
|
||||
if(stat(lists_path, &st) == 0) {
|
||||
u_int16_t suffix_id;
|
||||
char out[256];
|
||||
char *str;
|
||||
ndpi_protocol_category_t id;
|
||||
|
||||
NDPI_BITMASK_SET_ALL(all);
|
||||
ndpi_set_protocol_detection_bitmask2(ndpi_info_mod, &all);
|
||||
ndpi_set_protocol_detection_bitmask2(ndpi_str, &all);
|
||||
|
||||
assert(ndpi_load_domain_suffixes(ndpi_info_mod, (char*)lists_path) == 0);
|
||||
assert(ndpi_load_domain_suffixes(ndpi_str, (char*)lists_path) == 0);
|
||||
|
||||
assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "www.chosei.chiba.jp"), "chosei.chiba.jp") == 0);
|
||||
assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "www.unipi.it"), "it") == 0);
|
||||
assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "mail.apple.com"), "com") == 0);
|
||||
assert(strcmp(ndpi_get_host_domain_suffix(ndpi_info_mod, "www.bbc.co.uk"), "co.uk") == 0);
|
||||
ndpi_get_host_domain_suffix(ndpi_str, "lcb.it", &suffix_id); assert(suffix_id == 1117);
|
||||
ndpi_get_host_domain_suffix(ndpi_str, "www.ntop.org", &suffix_id); assert(suffix_id == 4503);
|
||||
ndpi_get_host_domain_suffix(ndpi_str, "www.bbc.co.uk", &suffix_id); assert(suffix_id == 5242);
|
||||
|
||||
assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "www.chosei.chiba.jp"), "www.chosei.chiba.jp") == 0);
|
||||
assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "www.unipi.it"), "unipi.it") == 0);
|
||||
assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "mail.apple.com"), "apple.com") == 0);
|
||||
assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "www.bbc.co.uk"), "bbc.co.uk") == 0);
|
||||
assert(strcmp(ndpi_get_host_domain(ndpi_info_mod, "zy1ssnfwwl.execute-api.eu-north-1.amazonaws.com"), "amazonaws.com") == 0);
|
||||
str = (char*)"www.ntop.org"; assert(ndpi_encode_domain(ndpi_str, str, out, sizeof(out)) == 8);
|
||||
str = (char*)"www.bbc.co.uk"; assert(ndpi_encode_domain(ndpi_str, str, out, sizeof(out)) == 8);
|
||||
|
||||
assert(ndpi_load_categories_dir(ndpi_str, "../lists"));
|
||||
assert(ndpi_load_categories_file(ndpi_str, "./categories.txt", "categories.txt"));
|
||||
|
||||
str = (char*)"2001:db8:1::1"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 100);
|
||||
str = (char*)"www.internetbadguys.com"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 100);
|
||||
str = (char*)"0grand-casino.com"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 107);
|
||||
str = (char*)"222.0grand-casino.com"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 107);
|
||||
str = (char*)"10bet.com"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 107);
|
||||
str = (char*)"www.ntop.org"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == -1); assert(id == 0);
|
||||
str = (char*)"www.andrewpope.com"; assert(ndpi_get_custom_category_match(ndpi_str, str, strlen(str), &id) == 0); assert(id == 100);
|
||||
}
|
||||
|
||||
ndpi_exit_detection_module(ndpi_info_mod);
|
||||
ndpi_exit_detection_module(ndpi_str);
|
||||
}
|
||||
|
||||
/* *********************************************** */
|
||||
|
||||
void domainsUnitTest() {
|
||||
NDPI_PROTOCOL_BITMASK all;
|
||||
struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
|
||||
const char *lists_path = "../lists/public_suffix_list.dat";
|
||||
struct stat st;
|
||||
|
||||
if(stat(lists_path, &st) == 0) {
|
||||
u_int16_t suffix_id;
|
||||
|
||||
NDPI_BITMASK_SET_ALL(all);
|
||||
ndpi_set_protocol_detection_bitmask2(ndpi_str, &all);
|
||||
|
||||
assert(ndpi_load_domain_suffixes(ndpi_str, (char*)lists_path) == 0);
|
||||
|
||||
assert(strcmp(ndpi_get_host_domain_suffix(ndpi_str, "www.chosei.chiba.jp", &suffix_id), "chosei.chiba.jp") == 0);
|
||||
assert(strcmp(ndpi_get_host_domain_suffix(ndpi_str, "www.unipi.it", &suffix_id), "it") == 0);
|
||||
assert(strcmp(ndpi_get_host_domain_suffix(ndpi_str, "mail.apple.com", &suffix_id), "com") == 0);
|
||||
assert(strcmp(ndpi_get_host_domain_suffix(ndpi_str, "www.bbc.co.uk", &suffix_id), "co.uk") == 0);
|
||||
|
||||
assert(strcmp(ndpi_get_host_domain(ndpi_str, "www.chosei.chiba.jp"), "www.chosei.chiba.jp") == 0);
|
||||
assert(strcmp(ndpi_get_host_domain(ndpi_str, "www.unipi.it"), "unipi.it") == 0);
|
||||
assert(strcmp(ndpi_get_host_domain(ndpi_str, "mail.apple.com"), "apple.com") == 0);
|
||||
assert(strcmp(ndpi_get_host_domain(ndpi_str, "www.bbc.co.uk"), "bbc.co.uk") == 0);
|
||||
assert(strcmp(ndpi_get_host_domain(ndpi_str, "zy1ssnfwwl.execute-api.eu-north-1.amazonaws.com"), "amazonaws.com") == 0);
|
||||
}
|
||||
|
||||
ndpi_exit_detection_module(ndpi_str);
|
||||
}
|
||||
|
||||
/* *********************************************** */
|
||||
|
|
@ -5829,27 +5872,29 @@ void domainsUnitTest() {
|
|||
void domainSearchUnitTest() {
|
||||
ndpi_domain_classify *sc = ndpi_domain_classify_alloc();
|
||||
char *domain = "ntop.org";
|
||||
u_int8_t class_id;
|
||||
u_int16_t class_id;
|
||||
struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
|
||||
|
||||
assert(ndpi_str);
|
||||
assert(sc);
|
||||
|
||||
ndpi_domain_classify_add(sc, NDPI_PROTOCOL_NTOP, ".ntop.org");
|
||||
ndpi_domain_classify_add(sc, NDPI_PROTOCOL_NTOP, domain);
|
||||
assert(ndpi_domain_classify_contains(sc, &class_id, domain));
|
||||
ndpi_domain_classify_add(ndpi_str, sc, NDPI_PROTOCOL_NTOP, ".ntop.org");
|
||||
ndpi_domain_classify_add(ndpi_str, sc, NDPI_PROTOCOL_NTOP, domain);
|
||||
assert(ndpi_domain_classify_hostname(ndpi_str, sc, &class_id, domain));
|
||||
|
||||
ndpi_domain_classify_add(sc, NDPI_PROTOCOL_CATEGORY_GAMBLING, "123vc.club");
|
||||
assert(ndpi_domain_classify_contains(sc, &class_id, "123vc.club"));
|
||||
ndpi_domain_classify_add(ndpi_str, sc, NDPI_PROTOCOL_CATEGORY_GAMBLING, "123vc.club");
|
||||
assert(ndpi_domain_classify_hostname(ndpi_str, sc, &class_id, "123vc.club"));
|
||||
assert(class_id == NDPI_PROTOCOL_CATEGORY_GAMBLING);
|
||||
|
||||
/* Subdomain check */
|
||||
assert(ndpi_domain_classify_contains(sc, &class_id, "blog.ntop.org"));
|
||||
assert(ndpi_domain_classify_hostname(ndpi_str, sc, &class_id, "blog.ntop.org"));
|
||||
assert(class_id == NDPI_PROTOCOL_NTOP);
|
||||
|
||||
#ifdef DEBUG_TRACE
|
||||
struct stat st;
|
||||
|
||||
if(stat(fname, &st) == 0) {
|
||||
u_int32_t s = ndpi_domain_classify_size(sc);
|
||||
u_int32_t s = ndpi_domain_classify_size(ndpi_str, sc);
|
||||
|
||||
printf("Size: %u [%.1f %% of the original filename size]\n",
|
||||
s, (float)(s * 100) / (float)st.st_size);
|
||||
|
|
@ -5857,20 +5902,26 @@ void domainSearchUnitTest() {
|
|||
#endif
|
||||
|
||||
ndpi_domain_classify_free(sc);
|
||||
ndpi_exit_detection_module(ndpi_str);
|
||||
}
|
||||
|
||||
/* *********************************************** */
|
||||
|
||||
void domainSearchUnitTest2() {
|
||||
struct ndpi_detection_module_struct *ndpi_str = ndpi_init_detection_module(NULL);
|
||||
ndpi_domain_classify *c = ndpi_domain_classify_alloc();
|
||||
u_int8_t class_id = 9;
|
||||
u_int16_t class_id = 9;
|
||||
|
||||
ndpi_domain_classify_add(c, class_id, "ntop.org");
|
||||
ndpi_domain_classify_add(c, class_id, "apple.com");
|
||||
assert(ndpi_str);
|
||||
assert(c);
|
||||
|
||||
assert(!ndpi_domain_classify_contains(c, &class_id, "ntop.com"));
|
||||
ndpi_domain_classify_add(ndpi_str, c, class_id, "ntop.org");
|
||||
ndpi_domain_classify_add(ndpi_str, c, class_id, "apple.com");
|
||||
|
||||
assert(!ndpi_domain_classify_hostname(ndpi_str, c, &class_id, "ntop.com"));
|
||||
|
||||
ndpi_domain_classify_free(c);
|
||||
ndpi_exit_detection_module(ndpi_str);
|
||||
}
|
||||
|
||||
/* *********************************************** */
|
||||
|
|
@ -5913,6 +5964,7 @@ int main(int argc, char **argv) {
|
|||
exit(0);
|
||||
#endif
|
||||
|
||||
encodeDomainsUnitTest();
|
||||
loadStressTest();
|
||||
domainsUnitTest();
|
||||
outlierUnitTest();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue