Add USDT static tracing probes for zero-overhead runtime observability (#3130)

Add opt-in USDT (User-level Statically Defined Tracing) support via
--enable-usdt-probes. When enabled, probes compile to single NOP
instructions with zero runtime cost; external tools like bpftrace and
perf can attach dynamically without restarting the application.

Two probes are added:
- flow_classified: fires once per flow at classification finalization,
  exposing master/app protocol, confidence, and category
- hostname_set: fires when a hostname/SNI is extracted, exposing the
  hostname string and master/app protocol

Includes CI job that builds with USDT, verifies probes via readelf, and
runs live bpftrace tracing against ndpiReader on test pcaps.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Ivan Nardi 2026-03-08 20:10:48 +01:00 committed by GitHub
parent d4ea2be1b7
commit e637cdd108
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 379 additions and 0 deletions

View file

@ -241,3 +241,75 @@ jobs:
msys2 -c './tests/do.sh'
msys2 -c './tests/do-unit.sh'
msys2 -c './tests/do-dga.sh'
test-usdt:
name: USDT probes (ubuntu-latest)
runs-on: ubuntu-latest
env:
CFLAGS: -Werror -g -O2
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Install Prerequisites
run: |
sudo apt-get update
sudo apt-get install autoconf automake libtool pkg-config gettext libjson-c-dev flex bison libpcap-dev
sudo apt-get install systemtap-sdt-dev bpftrace
- name: Configure nDPI with USDT
run: |
./autogen.sh && ./configure --enable-option-checking=fatal --enable-debug-messages --enable-usdt-probes
- name: Build nDPI
run: |
make -j all
make -C example
- name: Verify USDT probes are embedded (readelf)
run: |
echo "=== Probes in libndpi.so ==="
readelf -n src/lib/libndpi.so | grep -A4 stapsdt
echo "=== Probes in ndpiReader ==="
readelf -n example/ndpiReader | grep -A4 stapsdt
# Verify both probes are present
readelf -n example/ndpiReader | grep -q 'flow_classified'
readelf -n example/ndpiReader | grep -q 'hostname_set'
echo "All expected USDT probes found."
- name: List probes via bpftrace
run: |
sudo bpftrace -l "usdt:./example/ndpiReader:ndpi:*" | tee /tmp/probe_list.txt
grep -q 'flow_classified' /tmp/probe_list.txt
grep -q 'hostname_set' /tmp/probe_list.txt
echo "bpftrace can see all probes."
- name: Test flow_classified probe with ndpiReader
run: |
sudo bpftrace -e '
usdt:./example/ndpiReader:ndpi:flow_classified {
@flows = count();
@proto_master[arg0] = count();
@confidence[arg2] = count();
@category[arg3] = count();
}' -c './example/ndpiReader -q -i tests/pcap/http.pcapng' 2>&1 | tee /tmp/bpf_classified.txt
# Verify we actually traced some flows
grep -q '@flows:' /tmp/bpf_classified.txt
echo "flow_classified probe: OK"
- name: Test hostname_set probe with ndpiReader
run: |
sudo bpftrace -e '
usdt:./example/ndpiReader:ndpi:hostname_set {
@hostnames = count();
@top[str(arg0)] = count();
}' -c './example/ndpiReader -q -i tests/pcap/tls_certificate_too_long.pcap' 2>&1 | tee /tmp/bpf_hostname.txt
# Verify we actually traced some hostnames
grep -q '@hostnames:' /tmp/bpf_hostname.txt
echo "hostname_set probe: OK"
- name: Test both probes simultaneously
run: |
sudo bpftrace -e '
usdt:./example/ndpiReader:ndpi:hostname_set {
@hostnames = count();
}
usdt:./example/ndpiReader:ndpi:flow_classified {
@classified = count();
}' -c './example/ndpiReader -q -i tests/pcap/dns.pcap' 2>&1 | tee /tmp/bpf_both.txt
grep -q '@classified:' /tmp/bpf_both.txt
grep -q '@hostnames:' /tmp/bpf_both.txt
echo "Both probes fired successfully."

View file

@ -26,6 +26,7 @@ EXTRA_DIST = README.md README.fuzzer.md CHANGELOG.md CONTRIBUTING.md \
doc/configuration_parameters.rst \
doc/library_initialization.rst \
doc/monitoring.rst \
doc/usdt.rst \
python/DEV_GUIDE.md python/dev_requirements.txt python/ndpi_example.py python/ndpi/__init__.py \
python/ndpi/ndpi_build.py python/ndpi/ndpi.py python/README.md \
python/requirements.txt python/setup.py python/tests.py \

View file

@ -31,6 +31,15 @@ AC_ARG_ENABLE(plugin-support, AS_HELP_STRING([--disable-plugin-support], [Disabl
AC_ARG_ENABLE(global-context-support, AS_HELP_STRING([--disable-global-context-support], [Disable support for global context. No external dependency on libpthread]))
AC_ARG_ENABLE(memory-track-origins, AS_HELP_STRING([--disable-memory-track-origins], [Don't add -fsanitize-memory-track-origins flag when compiling with MASAN support. Useful for faster CI]))
AC_ARG_ENABLE(old-croaring, AS_HELP_STRING([--enable-old-croaring], [Always use old croaring version, instead of try to auto-detect if v4 works. Useful with old compilers]),[enable_oldcroaring=$enableval],[enable_oldcroaring=no])
AC_ARG_ENABLE(usdt-probes, AS_HELP_STRING([--enable-usdt-probes], [Enable USDT/DTrace static tracing probes]),[enable_usdt_probes=$enableval],[enable_usdt_probes=no])
AS_IF([test "x$enable_usdt_probes" = "xyes"], [
AC_CHECK_HEADER([sys/sdt.h], [
AC_DEFINE([HAVE_USDT], [1], [Define if USDT probes are enabled])
], [
AC_MSG_ERROR([--enable-usdt-probes requires sys/sdt.h (install systemtap-sdt-dev)])
])
])
#These two variables are not supposed to be set/changed by the user:
#he should use standard CFLAGS/LDFLAGS, instead
@ -762,6 +771,12 @@ AS_IF([test "x${CUSTOM_NDPI}" = "x-DCUSTOM_NDPI_PROTOCOLS"],
[SUMMARY="${SUMMARY}
Custom protocols: no"])
AS_IF([test "x${enable_usdt_probes}" = "xyes"],
[SUMMARY="${SUMMARY}
USDT probes: enabled"],
[SUMMARY="${SUMMARY}
USDT probes: disabled"])
SUMMARY="${SUMMARY}
Optional Dependencies:"

View file

@ -20,6 +20,7 @@ nDPI is an open source DPI (Deep Packet Inspection) toolkit for traffic analysis
monitoring
protocols
flow_risks
usdt
.. toctree::
:caption: Other Products

239
doc/usdt.rst Normal file
View file

@ -0,0 +1,239 @@
USDT Probes
===========
nDPI supports `USDT <https://lwn.net/Articles/753601/>`_ (User-level Statically Defined Tracing)
probes for zero-overhead dynamic tracing in production. USDT probes compile to a single NOP
instruction and have no runtime cost when not actively being traced. External tools like
``bpftrace``, ``perf``, and ``SystemTap`` can attach to these probes at runtime without restarting
the application.
Building with USDT Support
--------------------------
Install the required header (Linux):
.. code-block:: bash
# Debian/Ubuntu
sudo apt-get install systemtap-sdt-dev
# RHEL/CentOS/Fedora
sudo dnf install systemtap-sdt-devel
Then configure nDPI with USDT enabled:
.. code-block:: bash
./autogen.sh
./configure --enable-usdt-probes
make
.. note::
On macOS, ``sys/sdt.h`` is provided by the system. On platforms where it is
unavailable, the probes compile to no-ops and have zero impact.
Available Probes
----------------
.. list-table::
:header-rows: 1
:widths: 20 40 40
* - Probe Name
- Arguments
- Description
* - ``flow_classified``
- | ``arg0``: master protocol ID (``u16``)
| ``arg1``: application protocol ID (``u16``)
| ``arg2``: confidence level (``enum``)
| ``arg3``: category (``enum``)
- Fires exactly once per flow when classification is finalized.
Covers all exit paths: successful detection, giveup, max-packets,
nBPF match, and extra-dissector completion.
* - ``hostname_set``
- | ``arg0``: hostname string (``char *``)
| ``arg1``: master protocol ID (``u16``)
| ``arg2``: application protocol ID (``u16``)
- Fires when a hostname/SNI is extracted from a flow.
Covers all protocols that resolve hostnames: TLS (SNI), DNS,
HTTP (Host header), QUIC, NetBIOS, DHCP, STUN, and others.
bpftrace Examples
-----------------
List available probes:
.. code-block:: bash
bpftrace -l "usdt:./src/lib/.libs/libndpi.so:ndpi:*"
flow_classified Examples
^^^^^^^^^^^^^^^^^^^^^^^^
Real-time protocol classification log:
.. code-block:: bash
bpftrace -e 'usdt::ndpi:flow_classified {
printf("master=%d app=%d confidence=%d category=%d\n",
arg0, arg1, arg2, arg3);
}'
Protocol distribution histogram:
.. code-block:: bash
bpftrace -e 'usdt::ndpi:flow_classified {
@proto_master[arg0] = count();
}'
Confidence level breakdown:
.. code-block:: bash
bpftrace -e 'usdt::ndpi:flow_classified {
@confidence[arg2] = count();
}'
Category distribution:
.. code-block:: bash
bpftrace -e 'usdt::ndpi:flow_classified {
@category[arg3] = count();
}'
Count unknown/unclassified flows:
.. code-block:: bash
bpftrace -e 'usdt::ndpi:flow_classified /arg0 == 0/ {
@unknown = count();
}'
Flow classification rate (flows/sec):
.. code-block:: bash
bpftrace -e 'usdt::ndpi:flow_classified {
@ = count();
} interval:s:1 { print(@); clear(@); }'
Filter by specific protocol (e.g., TLS = 91):
.. code-block:: bash
bpftrace -e 'usdt::ndpi:flow_classified /arg0 == 91/ {
@tls[arg1] = count();
}'
Flows classified as SocialNetwork (category 6):
.. code-block:: bash
bpftrace -e 'usdt::ndpi:flow_classified /arg3 == 6/ {
@social[arg0, arg1] = count();
}'
hostname_set Examples
^^^^^^^^^^^^^^^^^^^^^
Real-time hostname log:
.. code-block:: bash
bpftrace -e 'usdt::ndpi:hostname_set {
printf("%s (master=%d app=%d)\n", str(arg0), arg1, arg2);
}'
Top hostnames by flow count:
.. code-block:: bash
bpftrace -e 'usdt::ndpi:hostname_set {
@top[str(arg0)] = count();
}'
Monitor a specific domain (e.g., all ``*.google.com`` traffic):
.. code-block:: bash
bpftrace -e 'usdt::ndpi:hostname_set /strcontains(str(arg0), "google.com")/ {
@google[str(arg0)] = count();
}'
Hostnames resolved via DNS only (DNS = 5):
.. code-block:: bash
bpftrace -e 'usdt::ndpi:hostname_set /arg1 == 5/ {
@dns[str(arg0)] = count();
}'
TLS SNI extraction in real time (TLS = 91):
.. code-block:: bash
bpftrace -e 'usdt::ndpi:hostname_set /arg1 == 91/ {
printf("TLS SNI: %s\n", str(arg0));
}'
Hostnames with their application protocol breakdown:
.. code-block:: bash
bpftrace -e 'usdt::ndpi:hostname_set {
@host_app[str(arg0), arg2] = count();
}'
Hostname resolution rate (hostnames/sec):
.. code-block:: bash
bpftrace -e 'usdt::ndpi:hostname_set {
@ = count();
} interval:s:1 { print(@); clear(@); }'
Detect potential DGA activity (short hostnames with many unique values):
.. code-block:: bash
bpftrace -e 'usdt::ndpi:hostname_set /arg1 == 5/ {
@unique_dns = count();
} interval:s:10 {
printf("Unique DNS hostnames in last 10s: %d\n", @unique_dns);
clear(@unique_dns);
}'
Correlate hostnames with protocol classification (combine both probes):
.. code-block:: bash
bpftrace -e '
usdt::ndpi:hostname_set { @host[tid] = str(arg0); }
usdt::ndpi:flow_classified /@host[tid] != ""/ {
printf("host=%s master=%d app=%d conf=%d cat=%d\n",
@host[tid], arg0, arg1, arg2, arg3);
delete(@host[tid]);
}'
perf Example
------------
Record probe hits with ``perf``:
.. code-block:: bash
perf probe -x ./src/lib/.libs/libndpi.so sdt_ndpi:flow_classified
perf record -e sdt_ndpi:flow_classified -p $(pidof ndpiReader) -- sleep 10
perf report
Overhead
--------
- **When not tracing:** zero overhead. Probes compile to a single NOP instruction.
- **When actively tracing:** approximately 2-5 microseconds per probe hit, depending on
the tracing tool and the complexity of the attached script.
- Both probes fire once per flow (not per packet), so even under active tracing the
overhead is negligible for typical traffic volumes.

View file

@ -36,6 +36,8 @@ extern "C" {
#define _NDPI_CONFIG_H_
#endif
#include "ndpi_usdt.h"
/* NDPI_NODE */
typedef struct node_t {
char *key;

38
src/include/ndpi_usdt.h Normal file
View file

@ -0,0 +1,38 @@
/*
*
* Copyright (C) 2011-26 - ntop.org
*
* nDPI is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* nDPI is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with nDPI. If not, see <http://www.gnu.org/licenses/>.
*
*/
#ifndef __NDPI_USDT_H__
#define __NDPI_USDT_H__
#ifdef HAVE_USDT
#include <sys/sdt.h>
#define NDPI_DTRACE0(name) DTRACE_PROBE(ndpi, name)
#define NDPI_DTRACE1(name, a) DTRACE_PROBE1(ndpi, name, a)
#define NDPI_DTRACE2(name, a, b) DTRACE_PROBE2(ndpi, name, a, b)
#define NDPI_DTRACE3(name, a, b, c) DTRACE_PROBE3(ndpi, name, a, b, c)
#define NDPI_DTRACE4(name, a, b, c, d) DTRACE_PROBE4(ndpi, name, a, b, c, d)
#else
#define NDPI_DTRACE0(name) ((void)0)
#define NDPI_DTRACE1(name, a) ((void)0)
#define NDPI_DTRACE2(name, a, b) ((void)0)
#define NDPI_DTRACE3(name, a, b, c) ((void)0)
#define NDPI_DTRACE4(name, a, b, c, d) ((void)0)
#endif
#endif /* __NDPI_USDT_H__ */

View file

@ -9547,6 +9547,12 @@ static void internal_giveup(struct ndpi_detection_module_struct *ndpi_struct,
"nDPI protocol does not match the server IP address");
}
NDPI_DTRACE4(flow_classified,
flow->detected_protocol_stack[0], /* proto_master */
flow->detected_protocol_stack[1], /* proto_app */
flow->confidence,
flow->category);
if(flow->state == NDPI_STATE_CLASSIFIED) {
NDPI_LOG_ERR(ndpi_struct, "Already classified!\n"); /* We shoudn't be here ...*/
}
@ -13127,6 +13133,11 @@ char *ndpi_hostname_sni_set(struct ndpi_flow_struct *flow,
}
}
NDPI_DTRACE3(hostname_set,
dst, /* hostname string */
flow->detected_protocol_stack[0], /* proto_master */
flow->detected_protocol_stack[1]); /* proto_app */
return dst;
}