diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 56c4f91f0..b12592d35 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -241,3 +241,75 @@ jobs: msys2 -c './tests/do.sh' msys2 -c './tests/do-unit.sh' msys2 -c './tests/do-dga.sh' + + test-usdt: + name: USDT probes (ubuntu-latest) + runs-on: ubuntu-latest + env: + CFLAGS: -Werror -g -O2 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install Prerequisites + run: | + sudo apt-get update + sudo apt-get install autoconf automake libtool pkg-config gettext libjson-c-dev flex bison libpcap-dev + sudo apt-get install systemtap-sdt-dev bpftrace + - name: Configure nDPI with USDT + run: | + ./autogen.sh && ./configure --enable-option-checking=fatal --enable-debug-messages --enable-usdt-probes + - name: Build nDPI + run: | + make -j all + make -C example + - name: Verify USDT probes are embedded (readelf) + run: | + echo "=== Probes in libndpi.so ===" + readelf -n src/lib/libndpi.so | grep -A4 stapsdt + echo "=== Probes in ndpiReader ===" + readelf -n example/ndpiReader | grep -A4 stapsdt + # Verify both probes are present + readelf -n example/ndpiReader | grep -q 'flow_classified' + readelf -n example/ndpiReader | grep -q 'hostname_set' + echo "All expected USDT probes found." + - name: List probes via bpftrace + run: | + sudo bpftrace -l "usdt:./example/ndpiReader:ndpi:*" | tee /tmp/probe_list.txt + grep -q 'flow_classified' /tmp/probe_list.txt + grep -q 'hostname_set' /tmp/probe_list.txt + echo "bpftrace can see all probes." + - name: Test flow_classified probe with ndpiReader + run: | + sudo bpftrace -e ' + usdt:./example/ndpiReader:ndpi:flow_classified { + @flows = count(); + @proto_master[arg0] = count(); + @confidence[arg2] = count(); + @category[arg3] = count(); + }' -c './example/ndpiReader -q -i tests/pcap/http.pcapng' 2>&1 | tee /tmp/bpf_classified.txt + # Verify we actually traced some flows + grep -q '@flows:' /tmp/bpf_classified.txt + echo "flow_classified probe: OK" + - name: Test hostname_set probe with ndpiReader + run: | + sudo bpftrace -e ' + usdt:./example/ndpiReader:ndpi:hostname_set { + @hostnames = count(); + @top[str(arg0)] = count(); + }' -c './example/ndpiReader -q -i tests/pcap/tls_certificate_too_long.pcap' 2>&1 | tee /tmp/bpf_hostname.txt + # Verify we actually traced some hostnames + grep -q '@hostnames:' /tmp/bpf_hostname.txt + echo "hostname_set probe: OK" + - name: Test both probes simultaneously + run: | + sudo bpftrace -e ' + usdt:./example/ndpiReader:ndpi:hostname_set { + @hostnames = count(); + } + usdt:./example/ndpiReader:ndpi:flow_classified { + @classified = count(); + }' -c './example/ndpiReader -q -i tests/pcap/dns.pcap' 2>&1 | tee /tmp/bpf_both.txt + grep -q '@classified:' /tmp/bpf_both.txt + grep -q '@hostnames:' /tmp/bpf_both.txt + echo "Both probes fired successfully." diff --git a/Makefile.am b/Makefile.am index 099aa380e..f703288c7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -26,6 +26,7 @@ EXTRA_DIST = README.md README.fuzzer.md CHANGELOG.md CONTRIBUTING.md \ doc/configuration_parameters.rst \ doc/library_initialization.rst \ doc/monitoring.rst \ + doc/usdt.rst \ python/DEV_GUIDE.md python/dev_requirements.txt python/ndpi_example.py python/ndpi/__init__.py \ python/ndpi/ndpi_build.py python/ndpi/ndpi.py python/README.md \ python/requirements.txt python/setup.py python/tests.py \ diff --git a/configure.ac b/configure.ac index 86bbaf0ab..08ba138b2 100644 --- a/configure.ac +++ b/configure.ac @@ -31,6 +31,15 @@ AC_ARG_ENABLE(plugin-support, AS_HELP_STRING([--disable-plugin-support], [Disabl AC_ARG_ENABLE(global-context-support, AS_HELP_STRING([--disable-global-context-support], [Disable support for global context. No external dependency on libpthread])) AC_ARG_ENABLE(memory-track-origins, AS_HELP_STRING([--disable-memory-track-origins], [Don't add -fsanitize-memory-track-origins flag when compiling with MASAN support. Useful for faster CI])) AC_ARG_ENABLE(old-croaring, AS_HELP_STRING([--enable-old-croaring], [Always use old croaring version, instead of try to auto-detect if v4 works. Useful with old compilers]),[enable_oldcroaring=$enableval],[enable_oldcroaring=no]) +AC_ARG_ENABLE(usdt-probes, AS_HELP_STRING([--enable-usdt-probes], [Enable USDT/DTrace static tracing probes]),[enable_usdt_probes=$enableval],[enable_usdt_probes=no]) + +AS_IF([test "x$enable_usdt_probes" = "xyes"], [ + AC_CHECK_HEADER([sys/sdt.h], [ + AC_DEFINE([HAVE_USDT], [1], [Define if USDT probes are enabled]) + ], [ + AC_MSG_ERROR([--enable-usdt-probes requires sys/sdt.h (install systemtap-sdt-dev)]) + ]) +]) #These two variables are not supposed to be set/changed by the user: #he should use standard CFLAGS/LDFLAGS, instead @@ -762,6 +771,12 @@ AS_IF([test "x${CUSTOM_NDPI}" = "x-DCUSTOM_NDPI_PROTOCOLS"], [SUMMARY="${SUMMARY} Custom protocols: no"]) +AS_IF([test "x${enable_usdt_probes}" = "xyes"], + [SUMMARY="${SUMMARY} + USDT probes: enabled"], + [SUMMARY="${SUMMARY} + USDT probes: disabled"]) + SUMMARY="${SUMMARY} Optional Dependencies:" diff --git a/doc/index.rst b/doc/index.rst index 44c8ebaa7..6ce42ca23 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -20,6 +20,7 @@ nDPI is an open source DPI (Deep Packet Inspection) toolkit for traffic analysis monitoring protocols flow_risks + usdt .. toctree:: :caption: Other Products diff --git a/doc/usdt.rst b/doc/usdt.rst new file mode 100644 index 000000000..8488df3ad --- /dev/null +++ b/doc/usdt.rst @@ -0,0 +1,239 @@ +USDT Probes +=========== + +nDPI supports `USDT `_ (User-level Statically Defined Tracing) +probes for zero-overhead dynamic tracing in production. USDT probes compile to a single NOP +instruction and have no runtime cost when not actively being traced. External tools like +``bpftrace``, ``perf``, and ``SystemTap`` can attach to these probes at runtime without restarting +the application. + +Building with USDT Support +-------------------------- + +Install the required header (Linux): + +.. code-block:: bash + + # Debian/Ubuntu + sudo apt-get install systemtap-sdt-dev + + # RHEL/CentOS/Fedora + sudo dnf install systemtap-sdt-devel + +Then configure nDPI with USDT enabled: + +.. code-block:: bash + + ./autogen.sh + ./configure --enable-usdt-probes + make + +.. note:: + + On macOS, ``sys/sdt.h`` is provided by the system. On platforms where it is + unavailable, the probes compile to no-ops and have zero impact. + +Available Probes +---------------- + +.. list-table:: + :header-rows: 1 + :widths: 20 40 40 + + * - Probe Name + - Arguments + - Description + * - ``flow_classified`` + - | ``arg0``: master protocol ID (``u16``) + | ``arg1``: application protocol ID (``u16``) + | ``arg2``: confidence level (``enum``) + | ``arg3``: category (``enum``) + - Fires exactly once per flow when classification is finalized. + Covers all exit paths: successful detection, giveup, max-packets, + nBPF match, and extra-dissector completion. + * - ``hostname_set`` + - | ``arg0``: hostname string (``char *``) + | ``arg1``: master protocol ID (``u16``) + | ``arg2``: application protocol ID (``u16``) + - Fires when a hostname/SNI is extracted from a flow. + Covers all protocols that resolve hostnames: TLS (SNI), DNS, + HTTP (Host header), QUIC, NetBIOS, DHCP, STUN, and others. + +bpftrace Examples +----------------- + +List available probes: + +.. code-block:: bash + + bpftrace -l "usdt:./src/lib/.libs/libndpi.so:ndpi:*" + +flow_classified Examples +^^^^^^^^^^^^^^^^^^^^^^^^ + +Real-time protocol classification log: + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:flow_classified { + printf("master=%d app=%d confidence=%d category=%d\n", + arg0, arg1, arg2, arg3); + }' + +Protocol distribution histogram: + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:flow_classified { + @proto_master[arg0] = count(); + }' + +Confidence level breakdown: + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:flow_classified { + @confidence[arg2] = count(); + }' + +Category distribution: + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:flow_classified { + @category[arg3] = count(); + }' + +Count unknown/unclassified flows: + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:flow_classified /arg0 == 0/ { + @unknown = count(); + }' + +Flow classification rate (flows/sec): + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:flow_classified { + @ = count(); + } interval:s:1 { print(@); clear(@); }' + +Filter by specific protocol (e.g., TLS = 91): + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:flow_classified /arg0 == 91/ { + @tls[arg1] = count(); + }' + +Flows classified as SocialNetwork (category 6): + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:flow_classified /arg3 == 6/ { + @social[arg0, arg1] = count(); + }' + +hostname_set Examples +^^^^^^^^^^^^^^^^^^^^^ + +Real-time hostname log: + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:hostname_set { + printf("%s (master=%d app=%d)\n", str(arg0), arg1, arg2); + }' + +Top hostnames by flow count: + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:hostname_set { + @top[str(arg0)] = count(); + }' + +Monitor a specific domain (e.g., all ``*.google.com`` traffic): + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:hostname_set /strcontains(str(arg0), "google.com")/ { + @google[str(arg0)] = count(); + }' + +Hostnames resolved via DNS only (DNS = 5): + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:hostname_set /arg1 == 5/ { + @dns[str(arg0)] = count(); + }' + +TLS SNI extraction in real time (TLS = 91): + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:hostname_set /arg1 == 91/ { + printf("TLS SNI: %s\n", str(arg0)); + }' + +Hostnames with their application protocol breakdown: + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:hostname_set { + @host_app[str(arg0), arg2] = count(); + }' + +Hostname resolution rate (hostnames/sec): + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:hostname_set { + @ = count(); + } interval:s:1 { print(@); clear(@); }' + +Detect potential DGA activity (short hostnames with many unique values): + +.. code-block:: bash + + bpftrace -e 'usdt::ndpi:hostname_set /arg1 == 5/ { + @unique_dns = count(); + } interval:s:10 { + printf("Unique DNS hostnames in last 10s: %d\n", @unique_dns); + clear(@unique_dns); + }' + +Correlate hostnames with protocol classification (combine both probes): + +.. code-block:: bash + + bpftrace -e ' + usdt::ndpi:hostname_set { @host[tid] = str(arg0); } + usdt::ndpi:flow_classified /@host[tid] != ""/ { + printf("host=%s master=%d app=%d conf=%d cat=%d\n", + @host[tid], arg0, arg1, arg2, arg3); + delete(@host[tid]); + }' + +perf Example +------------ + +Record probe hits with ``perf``: + +.. code-block:: bash + + perf probe -x ./src/lib/.libs/libndpi.so sdt_ndpi:flow_classified + perf record -e sdt_ndpi:flow_classified -p $(pidof ndpiReader) -- sleep 10 + perf report + +Overhead +-------- + +- **When not tracing:** zero overhead. Probes compile to a single NOP instruction. +- **When actively tracing:** approximately 2-5 microseconds per probe hit, depending on + the tracing tool and the complexity of the attached script. +- Both probes fire once per flow (not per packet), so even under active tracing the + overhead is negligible for typical traffic volumes. diff --git a/src/include/ndpi_private.h b/src/include/ndpi_private.h index 4983e968d..3f0f6710d 100644 --- a/src/include/ndpi_private.h +++ b/src/include/ndpi_private.h @@ -36,6 +36,8 @@ extern "C" { #define _NDPI_CONFIG_H_ #endif +#include "ndpi_usdt.h" + /* NDPI_NODE */ typedef struct node_t { char *key; diff --git a/src/include/ndpi_usdt.h b/src/include/ndpi_usdt.h new file mode 100644 index 000000000..3663c6ae3 --- /dev/null +++ b/src/include/ndpi_usdt.h @@ -0,0 +1,38 @@ +/* + * + * Copyright (C) 2011-26 - ntop.org + * + * nDPI is free software: you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * nDPI is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with nDPI. If not, see . + * + */ + +#ifndef __NDPI_USDT_H__ +#define __NDPI_USDT_H__ + +#ifdef HAVE_USDT + #include + #define NDPI_DTRACE0(name) DTRACE_PROBE(ndpi, name) + #define NDPI_DTRACE1(name, a) DTRACE_PROBE1(ndpi, name, a) + #define NDPI_DTRACE2(name, a, b) DTRACE_PROBE2(ndpi, name, a, b) + #define NDPI_DTRACE3(name, a, b, c) DTRACE_PROBE3(ndpi, name, a, b, c) + #define NDPI_DTRACE4(name, a, b, c, d) DTRACE_PROBE4(ndpi, name, a, b, c, d) +#else + #define NDPI_DTRACE0(name) ((void)0) + #define NDPI_DTRACE1(name, a) ((void)0) + #define NDPI_DTRACE2(name, a, b) ((void)0) + #define NDPI_DTRACE3(name, a, b, c) ((void)0) + #define NDPI_DTRACE4(name, a, b, c, d) ((void)0) +#endif + +#endif /* __NDPI_USDT_H__ */ diff --git a/src/lib/ndpi_main.c b/src/lib/ndpi_main.c index f27f56e44..6d688d05f 100644 --- a/src/lib/ndpi_main.c +++ b/src/lib/ndpi_main.c @@ -9547,6 +9547,12 @@ static void internal_giveup(struct ndpi_detection_module_struct *ndpi_struct, "nDPI protocol does not match the server IP address"); } + NDPI_DTRACE4(flow_classified, + flow->detected_protocol_stack[0], /* proto_master */ + flow->detected_protocol_stack[1], /* proto_app */ + flow->confidence, + flow->category); + if(flow->state == NDPI_STATE_CLASSIFIED) { NDPI_LOG_ERR(ndpi_struct, "Already classified!\n"); /* We shoudn't be here ...*/ } @@ -13127,6 +13133,11 @@ char *ndpi_hostname_sni_set(struct ndpi_flow_struct *flow, } } + NDPI_DTRACE3(hostname_set, + dst, /* hostname string */ + flow->detected_protocol_stack[0], /* proto_master */ + flow->detected_protocol_stack[1]); /* proto_app */ + return dst; }