<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>XDP Archives - Linuxcent</title>
	<atom:link href="https://linuxcent.com/tag/xdp/feed/" rel="self" type="application/rss+xml" />
	<link>https://linuxcent.com/tag/xdp/</link>
	<description>Infrastructure security, from the kernel up.</description>
	<lastBuildDate>Tue, 21 Apr 2026 15:14:03 +0000</lastBuildDate>
	<language>en-US</language>
	<sy:updatePeriod>
	hourly	</sy:updatePeriod>
	<sy:updateFrequency>
	1	</sy:updateFrequency>
	<generator>https://wordpress.org/?v=6.9.4</generator>

<image>
	<url>https://linuxcent.com/wp-content/uploads/2026/04/favicon-512x512-1-150x150.png</url>
	<title>XDP Archives - Linuxcent</title>
	<link>https://linuxcent.com/tag/xdp/</link>
	<width>32</width>
	<height>32</height>
</image> 
<site xmlns="com-wordpress:feed-additions:1">211632295</site>	<item>
		<title>XDP — Packets Processed Before the Kernel Knows They Arrived</title>
		<link>https://linuxcent.com/ebpf-xdp-kubernetes-networking/</link>
					<comments>https://linuxcent.com/ebpf-xdp-kubernetes-networking/#respond</comments>
		
		<dc:creator><![CDATA[Vamshi Krishna Santhapuri]]></dc:creator>
		<pubDate>Tue, 21 Apr 2026 14:53:14 +0000</pubDate>
				<category><![CDATA[eBPF]]></category>
		<category><![CDATA[Cilium]]></category>
		<category><![CDATA[DDoS mitigation]]></category>
		<category><![CDATA[Kubernetes]]></category>
		<category><![CDATA[Linux Networking]]></category>
		<category><![CDATA[SRE]]></category>
		<category><![CDATA[XDP]]></category>
		<guid isPermaLink="false">https://linuxcent.com/ebpf-xdp-kubernetes-networking/</guid>

					<description><![CDATA[<p><span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 10</span> <span class="rt-label rt-postfix">minutes</span></span>XDP processes packets before the Linux kernel allocates a single byte of memory. How Cilium uses XDP for service load balancing and how it differs from iptables.</p>
<p>The post <a href="https://linuxcent.com/ebpf-xdp-kubernetes-networking/">XDP — Packets Processed Before the Kernel Knows They Arrived</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></description>
										<content:encoded><![CDATA[<span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 10</span> <span class="rt-label rt-postfix">minutes</span></span><style>
pre{position:relative;background:#1e1e1e;color:#d4d4d4;
    padding:16px 16px 16px 20px;border-radius:6px;overflow-x:auto;
    font-family:'JetBrains Mono','Fira Code','Cascadia Code',Consolas,'Courier New',monospace;
    font-size:.88em;line-height:1.6;border-left:4px solid #555}
code{background:#f4f4f4;padding:2px 5px;border-radius:3px;font-size:.9em}
pre code{background:transparent;padding:0;color:inherit}
pre[data-lang="bash"],pre[data-lang="sh"],
pre[data-lang="shell"],pre[data-lang="zsh"]{border-left-color:#4ec9b0}
pre[data-lang="yaml"],pre[data-lang="json"],
pre[data-lang="toml"],pre[data-lang="xml"]{border-left-color:#569cd6}
pre[data-lang="python"],pre[data-lang="go"],pre[data-lang="rust"],
pre[data-lang="java"],pre[data-lang="c"],pre[data-lang="cpp"]{border-left-color:#c586c0}
pre[data-lang="text"],pre[data-lang="output"],
pre[data-lang="console"]{border-left-color:#888}
.lc-copy-btn{position:absolute;top:8px;right:8px;background:#2d2d2d;color:#ccc;
    border:1px solid #444;border-radius:4px;padding:3px 9px;font-size:.75em;
    font-family:system-ui,sans-serif;cursor:pointer;opacity:0;
    transition:opacity .15s,background .15s;line-height:1.6}
pre:hover .lc-copy-btn{opacity:1}
.lc-copy-btn:hover{background:#3a3a3a;color:#fff}
.lc-copy-btn.copied{color:#4ec9b0;border-color:#4ec9b0}
.lc-lang-badge{position:absolute;top:8px;left:20px;font-family:system-ui,sans-serif;
    font-size:.7em;color:#666;text-transform:uppercase;letter-spacing:.04em;
    line-height:1;pointer-events:none;opacity:0;transition:opacity .15s}
pre:hover .lc-lang-badge{opacity:1}
table{border-collapse:collapse;width:100%;margin:16px 0}
th,td{border:1px solid #ddd;padding:10px 14px;text-align:left}
th{background:#f0f0f0;font-weight:600}
tr:nth-child(even){background:#fafafa}
</style>
<p><script>
(function(){
  if(window.__lcCodeEnhanced)return;
  window.__lcCodeEnhanced=true;
  function enhance(){
    document.querySelectorAll('pre').forEach(function(pre){
      var code=pre.querySelector('code');
      var lang='';
      if(code){var m=(code.className||'').match(/language-(\S+)/);if(m)lang=m[1].toLowerCase();}
      if(lang)pre.setAttribute('data-lang',lang);
      if(lang){var badge=document.createElement('span');badge.className='lc-lang-badge';badge.textContent=lang;pre.insertBefore(badge,pre.firstChild);}
      var btn=document.createElement('button');
      btn.className='lc-copy-btn';btn.textContent='Copy';btn.setAttribute('aria-label','Copy code to clipboard');
      pre.appendChild(btn);
      btn.addEventListener('click',function(){
        var text=code?code.innerText:pre.innerText;
        if(navigator.clipboard&&window.isSecureContext){
          navigator.clipboard.writeText(text).then(function(){ok(btn);}).catch(function(){fb(text,btn);});
        }else{fb(text,btn);}
      });
    });
  }
  function ok(btn){btn.textContent='Copied!';btn.classList.add('copied');setTimeout(function(){btn.textContent='Copy';btn.classList.remove('copied');},2000);}
  function fb(text,btn){
    try{var ta=document.createElement('textarea');ta.value=text;ta.style.cssText='position:fixed;left:-9999px;top:-9999px;opacity:0';document.body.appendChild(ta);ta.select();document.execCommand('copy');document.body.removeChild(ta);ok(btn);}
    catch(e){btn.textContent='✗ Failed';setTimeout(function(){btn.textContent='Copy';},2000);}
  }
  if(document.readyState==='loading'){document.addEventListener('DOMContentLoaded',enhance);}else{enhance();}
})();
</script></p>
<p><em>eBPF: From Kernel to Cloud, Episode 7</em><br />
<em><a href="https://linuxcent.com/what-is-ebpf-linux-kubernetes/">What Is eBPF?</a> · <a href="https://linuxcent.com/bpf-verifier-kubernetes-safety/">The BPF Verifier</a> · <a href="https://linuxcent.com/ebpf-vs-kernel-modules-kubernetes/">eBPF vs Kernel Modules</a> · <a href="https://linuxcent.com/ebpf-program-types-kubernetes/">eBPF Program Types</a> · <a href="https://linuxcent.com/ebpf-maps-explained/">eBPF Maps</a> · <a href="https://linuxcent.com/ebpf-co-re-libbpf-portable-programs/">CO-RE and libbpf</a> · </em><em>XDP</em>**</p>
<hr />
<h2 id="tldr">TL;DR</h2>
<ul>
<li>XDP fires before <code class="" data-line="">sk_buff</code> allocation — the earliest possible kernel hook for packet processing<br />
<em>(<code class="" data-line="">sk_buff</code> = the kernel&#8217;s socket buffer — every normal packet requires one to be allocated, which adds up fast at scale)</em></li>
<li>Three modes: native (in-driver, full performance), generic (fallback, no perf gain), offloaded (NIC ASIC)</li>
<li>XDP context is raw packet bytes — no socket, no cgroup, no pod identity; handle non-IP traffic explicitly</li>
<li>Every pointer dereference requires a bounds check against <code class="" data-line="">data_end</code> — the verifier enforces this</li>
<li><code class="" data-line="">BPF_MAP_TYPE_LPM_TRIE</code> is the right map type for IP prefix blocklists — handles /32 hosts and CIDRs together</li>
<li>XDP metadata area enables coordination with TC programs — classify at XDP speed, enforce with pod context at TC</li>
</ul>
<hr />
<p>XDP eBPF fires before <code class="" data-line="">sk_buff</code> allocation — the earliest possible kernel hook, and the reason iptables rules can be technically correct while still burning CPU at high packet rates. I had iptables DROP rules installed and working during a SYN flood. Packets were being dropped. CPU was still burning at 28% software interrupt time. The rules weren&#8217;t wrong. The hook was in the wrong place.</p>
<hr />
<p>A client&#8217;s cluster was under a SYN flood — roughly 1 million packets per second from a rotating set of source IPs. We had iptables DROP rules installed within the first ten minutes, blocklist updated every 30 seconds as new source ranges appeared. The flood traffic dropped in volume. But node CPU stayed high. The <code class="" data-line="">%si</code> column in <code class="" data-line="">top</code> — software interrupt time — was sitting at 25–30%.</p>
<blockquote>
<p><strong><code class="" data-line="">%si</code> in <code class="" data-line="">top</code></strong> is the percentage of CPU time spent handling hardware interrupts and kernel-level packet processing — separate from your application&#8217;s CPU usage. On a quiet managed cluster (EKS, GKE) this is usually under 1%. Under a packet flood, high <code class="" data-line="">%si</code> means the kernel is burning cycles just <em>receiving</em> packets, before your workloads run at all. It&#8217;s the metric that tells you the problem is below the application layer.</p>
</blockquote>
<p>I didn&#8217;t understand why. The iptables rules were matching. Packets were being dropped. Why was the CPU still burning?</p>
<p>The answer is where in the kernel the drop was happening. iptables fires inside the <code class="" data-line="">netfilter</code> framework — after the kernel has already allocated an <code class="" data-line="">sk_buff</code> for the packet, done DMA from the NIC ring buffer, and traversed several netfilter hooks.</p>
<blockquote>
<p><strong>netfilter</strong> is the Linux kernel subsystem that handles packet filtering, NAT, and connection tracking. iptables is the userspace CLI that writes rules into it. At high packet rates, the cost isn&#8217;t the rule match — it&#8217;s the kernel work that happens before the rule is evaluated. At 1 million packets per second, the allocation cost alone is measurable. The attack was &#8220;slow&#8221; in network terms, but fast enough to keep the kernel memory allocator and netfilter traversal continuously busy.</p>
</blockquote>
<p>XDP fires before any of that. Before <code class="" data-line="">sk_buff</code>. Before routing. Before the kernel network stack has touched the packet at all. A DROP decision at the XDP layer costs one bounds check and a return value. Nothing else.</p>
<h2 id="quick-check-is-xdp-running-on-your-cluster">Quick Check: Is XDP Running on Your Cluster?</h2>
<p>Before the data path walkthrough — a two-command check you can run right now on any cluster node:</p>
<pre><code class="" data-line=""># SSH into a worker node, then:
bpftool net list
</code></pre>
<p>On a Cilium-managed node, you&#8217;ll see something like:</p>
<pre><code class="" data-line="">eth0 (index 2):
        xdpdrv  id 44

lxc8a3f21b (index 7):
        tc ingress id 47
        tc egress  id 48
</code></pre>
<p>Reading the output:<br />
&#8211; <code class="" data-line="">xdpdrv</code> — XDP in <strong>native mode</strong>, running in the NIC driver before <code class="" data-line="">sk_buff</code> (this is what you want)<br />
&#8211; <code class="" data-line="">xdpgeneric</code> instead of <code class="" data-line="">xdpdrv</code> — <strong>generic mode</strong>, runs after <code class="" data-line="">sk_buff</code> allocation, no performance benefit<br />
&#8211; No XDP line at all — XDP not deployed; your CNI uses iptables for service forwarding</p>
<p>If you&#8217;re on <strong>EKS with <code class="" data-line="">aws-vpc-cni</code></strong> or <strong>GKE with <code class="" data-line="">kubenet</code></strong>, you likely won&#8217;t see XDP here — those CNIs use iptables. Understanding this section explains why teams migrating to Cilium see lower node CPU under the same traffic load.</p>
<h2 id="where-xdp-sits-in-the-kernel-data-path">Where XDP Sits in the Kernel Data Path</h2>
<p>The standard Linux packet receive path:</p>
<pre><code class="" data-line="">NIC hardware
  ↓
DMA to ring buffer (kernel memory)
  ↓
[XDP hook — fires here, before sk_buff]
  ├── XDP_DROP   → discard, zero further allocation
  ├── XDP_PASS   → continue to kernel network stack
  ├── XDP_TX     → transmit back out the same interface
  └── XDP_REDIRECT → forward to another interface or CPU
  ↓
sk_buff allocated from slab allocator
  ↓
netfilter: PREROUTING
  ↓
IP routing decision
  ↓
netfilter: INPUT or FORWARD
  ↓  [iptables fires somewhere in here]
socket receive queue
  ↓
userspace application
</code></pre>
<p>XDP runs at the driver level, in the NAPI poll context — the same context where the driver is processing received packets off the ring buffer. The program runs before the kernel&#8217;s general networking code gets involved. There&#8217;s no <code class="" data-line="">sk_buff</code>, no reference counting, no slab allocation.</p>
<blockquote>
<p><strong>NAPI</strong> (New API) is how modern Linux receives packets efficiently. Instead of one CPU interrupt per packet (catastrophically expensive at 1Mpps), the NIC fires a single interrupt, then the kernel polls the NIC ring buffer in batches until it&#8217;s drained. XDP runs inside this polling loop — as close to the hardware as software gets without running on the NIC itself.</p>
</blockquote>
<p>At 1Mpps, the difference between XDP_DROP and an iptables DROP is roughly the cost of allocating and then immediately freeing 1 million <code class="" data-line="">sk_buff</code> objects per second — plus netfilter traversal, connection tracking lookup, and the DROP action itself. That&#8217;s the CPU time that was burning.</p>
<p>After moving the blocklist to an XDP program, the <code class="" data-line="">%si</code> on the same traffic load dropped from 28% to 3%.</p>
<h2 id="xdp-modes">XDP Modes</h2>
<p>XDP operates in three modes, and which one you get depends on your NIC driver.</p>
<h3 id="native-xdp-xdp_flags_drv_mode">Native XDP (XDP_FLAGS_DRV_MODE)</h3>
<p>The eBPF program runs directly in the NIC driver&#8217;s NAPI poll function — in interrupt context, before <code class="" data-line="">sk_buff</code>. This is the only mode that delivers the full performance benefit.</p>
<p>Driver support is required. The widely supported drivers: <code class="" data-line="">mlx4</code>, <code class="" data-line="">mlx5</code> (Mellanox/NVIDIA), <code class="" data-line="">i40e</code>, <code class="" data-line="">ice</code> (Intel), <code class="" data-line="">bnxt_en</code> (Broadcom), <code class="" data-line="">virtio_net</code> (KVM/QEMU), <code class="" data-line="">veth</code> (containers). Check support:</p>
<pre><code class="" data-line=""># Verify native XDP support on your driver
ethtool -i eth0 | grep driver
# driver: mlx5_core  ← supports native XDP

# Load in native mode
ip link set dev eth0 xdpdrv obj blocklist.bpf.o sec xdp
</code></pre>
<p>The <code class="" data-line="">veth</code> driver supporting native XDP is what makes XDP meaningful inside Kubernetes pods — each pod&#8217;s veth interface can run an XDP program at wire speed.</p>
<h3 id="generic-xdp-xdp_flags_skb_mode">Generic XDP (XDP_FLAGS_SKB_MODE)</h3>
<p>Fallback for drivers that don&#8217;t support native XDP. The program still runs, but it runs after <code class="" data-line="">sk_buff</code> allocation, as a hook in the <code class="" data-line="">netif_receive_skb</code> path. No performance benefit over early netfilter. <code class="" data-line="">sk_buff</code> is still allocated and freed for every packet.</p>
<pre><code class="" data-line=""># Generic mode — development and testing only
ip link set dev eth0 xdpgeneric obj blocklist.bpf.o sec xdp
</code></pre>
<p>Use this for development on a laptop with a NIC that lacks native XDP support. Never benchmark with it and never use it in production expecting performance gains.</p>
<h3 id="offloaded-xdp">Offloaded XDP</h3>
<p>Runs on the NIC&#8217;s own processing unit (SmartNIC). Zero CPU involvement — the XDP decision happens in NIC hardware. Supported by Netronome Agilio NICs. Rare in production, but the theoretical ceiling for XDP performance.</p>
<h2 id="the-xdp-context-what-your-program-can-see">The XDP Context: What Your Program Can See</h2>
<p>XDP programs receive one argument: <code class="" data-line="">struct xdp_md</code>.</p>
<pre><code class="" data-line="">struct xdp_md {
    __u32 data;           // offset of first packet byte in the ring buffer page
    __u32 data_end;       // offset past the last byte
    __u32 data_meta;      // metadata area before data (XDP metadata for TC cooperation)
    __u32 ingress_ifindex;
    __u32 rx_queue_index;
};
</code></pre>
<p><code class="" data-line="">data</code> and <code class="" data-line="">data_end</code> are used as follows:</p>
<pre><code class="" data-line="">void *data     = (void *)(long)ctx-&gt;data;
void *data_end = (void *)(long)ctx-&gt;data_end;

// Every pointer dereference must be bounds-checked first
struct ethhdr *eth = data;
if ((void *)(eth + 1) &gt; data_end)
    return XDP_PASS;  // malformed or truncated packet
</code></pre>
<p>The verifier enforces these bounds checks — every pointer derived from <code class="" data-line="">ctx-&gt;data</code> must be validated before use. The error <code class="" data-line="">invalid mem access &#039;inv&#039;</code> means you dereferenced a pointer without checking the bounds. This is the most common cause of XDP program rejection.</p>
<blockquote>
<p><strong>For operators (not writing XDP code):</strong> You&#8217;ll see <code class="" data-line="">invalid mem access &#039;inv&#039;</code> in logs when an eBPF program is rejected at load time — most commonly during a Cilium upgrade or a custom tool deployment on a kernel the tool wasn&#8217;t built for. The fix is in the eBPF source or the tool version, not the cluster config. If you see this error and you&#8217;re not writing eBPF yourself, it means the tool&#8217;s build doesn&#8217;t match your kernel version — upgrade the tool or check its supported kernel matrix.</p>
</blockquote>
<p>What XDP <strong>cannot</strong> see:<br />
&#8211; Socket state — no socket buffer exists yet<br />
&#8211; Cgroup hierarchy — no pod identity<br />
&#8211; Process information — no PID, no container<br />
&#8211; Connection tracking state (unless you maintain it yourself in a map)</p>
<p>XDP is ingress-only. It fires on packets arriving at an interface, not departing. For egress, TC is the hook.</p>
<h2 id="what-this-means-on-your-cluster-right-now">What This Means on Your Cluster Right Now</h2>
<p>Every Cilium-managed node has XDP programs running. Here&#8217;s how to see them:</p>
<pre><code class="" data-line=""># All XDP programs on all interfaces — this is the full picture
bpftool net list
# Sample output on a Cilium node:
#
# eth0 (index 2):
#         xdpdrv  id 44         ← XDP in native mode on the node uplink
#
# lxc8a3f21b (index 7):
#         tc ingress id 47      ← TC enforces NetworkPolicy on pod ingress
#         tc egress  id 48      ← TC enforces NetworkPolicy on pod egress
#
# &quot;xdpdrv&quot;     = native mode (runs in NIC driver, before sk_buff — full performance)
# &quot;xdpgeneric&quot; = fallback mode (after sk_buff — no performance benefit over iptables)

# Which mode is active?
ip link show eth0 | grep xdp
# xdp mode drv  ← native (full performance)
# xdp mode generic  ← fallback (no perf benefit)

# Details on the XDP program ID
bpftool prog show id $(bpftool net show dev eth0 | grep xdp | awk &#039;{print $NF}&#039;)
# Shows: loaded_at, tag, xlated bytes, jited bytes, map IDs
</code></pre>
<p>The <code class="" data-line="">map IDs</code> in that output are the BPF maps the XDP program is using — typically the service VIP table for DNAT, and in security tools, the blocklist or allowlist. To see what&#8217;s in them:</p>
<pre><code class="" data-line=""># List maps used by the XDP program
bpftool prog show id &lt;PROG_ID&gt; | grep map_ids

# Dump the service map (for a Cilium node — this is the load balancer table)
bpftool map dump id &lt;MAP_ID&gt; | head -40
</code></pre>
<p>For a blocklist scenario — like the SYN flood mitigation above — the <code class="" data-line="">BPF_MAP_TYPE_LPM_TRIE</code> is the standard data structure. A lookup for <code class="" data-line="">192.168.1.45</code> hits a <code class="" data-line="">192.168.1.0/24</code> entry in the same map, handling both host /32s and CIDR ranges in one lookup. The practical operational check:</p>
<pre><code class="" data-line=""># Count entries in an XDP filter map
bpftool map dump id &lt;BLOCKLIST_MAP_ID&gt; | grep -c &quot;key&quot;

# Verify XDP is active and inspect program details
bpftool net show dev eth0
</code></pre>
<h2 id="xdp-metadata-cooperating-with-tc">XDP Metadata: Cooperating with TC</h2>
<p>Think of it as a sticky note attached to the packet. XDP writes the note at line speed (no context about pods or sockets). TC reads it later when full context is available, and acts on it. The packet carries the note between them.</p>
<p>More precisely: XDP can write metadata into the area before <code class="" data-line="">ctx-&gt;data</code> — a small scratch space that survives as the packet moves from XDP to the TC hook. This is the coordination mechanism between the two eBPF layers.</p>
<p>The pattern is: XDP classifies at speed (no <code class="" data-line="">sk_buff</code> overhead), TC enforces with pod context (where you have socket identity). XDP writes a classification tag into the metadata area. TC reads it and makes the policy decision.</p>
<p>This is the architecture behind tools like Pro-NDS: the fast-path pattern matching (connection tracking, signature matching) happens at XDP before any kernel allocation. The enforcement action — which requires knowing which pod sent this — happens at TC using the metadata XDP already wrote.</p>
<p>From an operational standpoint, when you see two eBPF programs on the same interface (one XDP, one TC), this pipeline is the likely explanation. The <code class="" data-line="">bpftool net list</code> output shows both:</p>
<pre><code class="" data-line="">bpftool net list
# xdpdrv id 44 on eth0       ← XDP classifier running at line rate
# tc ingress id 47 on eth0   ← TC enforcer reading XDP metadata
</code></pre>
<h2 id="how-cilium-uses-xdp">How Cilium Uses XDP</h2>
<blockquote>
<p><strong>Not running Cilium?</strong> On EKS with <code class="" data-line="">aws-vpc-cni</code> or GKE with <code class="" data-line="">kubenet</code>, service forwarding uses iptables NAT rules and <code class="" data-line="">conntrack</code> instead. You can see this with <code class="" data-line="">iptables -t nat -L -n</code> on a node — look for the <code class="" data-line="">KUBE-SVC-*</code> chains. Those chains are what XDP replaces in a Cilium cluster. This is why teams migrating from kube-proxy to Cilium report lower node CPU at high connection rates — it&#8217;s not magic, it&#8217;s hook placement.</p>
</blockquote>
<p>On a Cilium node, XDP handles the load balancing path for ClusterIP services. When a packet arrives at the node destined for a ClusterIP:</p>
<ol>
<li>XDP program checks the destination IP against a BPF LRU hash map of known service VIPs</li>
<li>On a match, it performs DNAT — rewriting the destination IP to a backend pod IP</li>
<li>Returns <code class="" data-line="">XDP_TX</code> or <code class="" data-line="">XDP_REDIRECT</code> to forward directly</li>
</ol>
<p>No iptables NAT rules. No <code class="" data-line="">conntrack</code> state machine. No socket buffer allocation for the routing decision. The lookup is O(1) in a BPF hash map.</p>
<pre><code class="" data-line=""># See Cilium&#039;s XDP program on the node uplink
ip link show eth0 | grep xdp
# xdp  (attached, native mode)

# The XDP program details
bpftool prog show pinned /sys/fs/bpf/cilium/xdp

# Load time, instruction count, JIT-compiled size
bpftool prog show id $(bpftool net list | grep xdp | awk &#039;{print $NF}&#039;)
</code></pre>
<p>At production scale — 500+ nodes, 50k+ services — removing iptables from the service forwarding path with XDP reduces per-node CPU utilization measurably. The effect is most visible on nodes handling high connection rates to cluster services.</p>
<h2 id="operational-inspection">Operational Inspection</h2>
<pre><code class="" data-line=""># All XDP programs on all interfaces
bpftool net list

# Check XDP mode (native, generic, offloaded)
ip link show | grep xdp

# Per-interface stats — includes XDP drop/pass counters
cat /sys/class/net/eth0/statistics/rx_dropped

# XDP drop counters exposed via bpftool
bpftool map dump id &lt;stats_map_id&gt;

# Verify XDP is active and show program details
bpftool net show dev eth0
</code></pre>
<h2 id="common-mistakes">Common Mistakes</h2>
<table>
<thead>
<tr>
<th>Mistake</th>
<th>Impact</th>
<th>Fix</th>
</tr>
</thead>
<tbody>
<tr>
<td>Missing bounds check before pointer dereference</td>
<td>Verifier rejects: &#8220;invalid mem access&#8221;</td>
<td>Always check <code class="" data-line="">ptr + sizeof(*ptr) &gt; data_end</code> before use</td>
</tr>
<tr>
<td>Using generic XDP for performance testing</td>
<td>Misleading numbers — sk_buff still allocated</td>
<td>Test in native mode only; check <code class="" data-line="">ip link</code> output for mode</td>
</tr>
<tr>
<td>Not handling non-IP traffic (ARP, IPv6, VLAN)</td>
<td>ARP breaks, IPv6 drops, VLAN-tagged frames dropped</td>
<td>Check <code class="" data-line="">eth-&gt;h_proto</code> and return <code class="" data-line="">XDP_PASS</code> for non-IP</td>
</tr>
<tr>
<td>XDP for egress or pod identity</td>
<td>No socket context at XDP; XDP is ingress only</td>
<td>Use TC egress for pod-identity-aware egress policy</td>
</tr>
<tr>
<td>Forgetting <code class="" data-line="">BPF_F_NO_PREALLOC</code> on LPM trie</td>
<td>Full memory allocated at map creation for all entries</td>
<td>Always set this flag for sparse prefix tries</td>
</tr>
<tr>
<td>Blocking ARP by accident in a /24 blocklist</td>
<td>Loss of layer-2 reachability within the blocked subnet</td>
<td>Separate ARP handling before the IP blocklist check</td>
</tr>
</tbody>
</table>
<h2 id="key-takeaways">Key Takeaways</h2>
<ul>
<li>XDP fires before <code class="" data-line="">sk_buff</code> allocation — the earliest possible kernel hook for packet processing</li>
<li>Three modes: native (in-driver, full performance), generic (fallback, no perf gain), offloaded (NIC ASIC)</li>
<li>XDP context is raw packet bytes — no socket, no cgroup, no pod identity; handle non-IP traffic explicitly</li>
<li>Every pointer dereference requires a bounds check against <code class="" data-line="">data_end</code> — the verifier enforces this</li>
<li><code class="" data-line="">BPF_MAP_TYPE_LPM_TRIE</code> is the right map for IP prefix blocklists — handles /32 hosts and CIDRs together</li>
<li>XDP metadata area enables coordination with TC programs — classify at XDP speed, enforce with pod context at TC</li>
</ul>
<h2 id="whats-next">What&#8217;s Next</h2>
<p>XDP handles ingress at the fastest possible point but has no visibility into which pod sent a packet. EP08 covers TC eBPF — the hook that fires after <code class="" data-line="">sk_buff</code> allocation, where socket and cgroup context exist.</p>
<p>TC is how Cilium implements pod-to-pod network policy without iptables. It&#8217;s also where stale programs from failed Cilium upgrades leave ghost filters that cause intermittent packet drops. Knowing how TC programs chain — and how to find and remove stale ones — is a specific, concrete operational skill.</p>
<p><em>Next: <a href="/ebpf-tc-pod-policy/">TC eBPF — pod-level network policy without iptables</a></em></p>
<p>Get EP08 in your inbox when it publishes → <a href="https://linuxcent.com/subscribe">linuxcent.com/subscribe</a></p>
<p><a class="a2a_button_mastodon" href="https://www.addtoany.com/add_to/mastodon?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-xdp-kubernetes-networking%2F&amp;linkname=XDP%20%E2%80%94%20Packets%20Processed%20Before%20the%20Kernel%20Knows%20They%20Arrived" title="Mastodon" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_email" href="https://www.addtoany.com/add_to/email?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-xdp-kubernetes-networking%2F&amp;linkname=XDP%20%E2%80%94%20Packets%20Processed%20Before%20the%20Kernel%20Knows%20They%20Arrived" title="Email" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_whatsapp" href="https://www.addtoany.com/add_to/whatsapp?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-xdp-kubernetes-networking%2F&amp;linkname=XDP%20%E2%80%94%20Packets%20Processed%20Before%20the%20Kernel%20Knows%20They%20Arrived" title="WhatsApp" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_reddit" href="https://www.addtoany.com/add_to/reddit?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-xdp-kubernetes-networking%2F&amp;linkname=XDP%20%E2%80%94%20Packets%20Processed%20Before%20the%20Kernel%20Knows%20They%20Arrived" title="Reddit" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_x" href="https://www.addtoany.com/add_to/x?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-xdp-kubernetes-networking%2F&amp;linkname=XDP%20%E2%80%94%20Packets%20Processed%20Before%20the%20Kernel%20Knows%20They%20Arrived" title="X" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_linkedin" href="https://www.addtoany.com/add_to/linkedin?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-xdp-kubernetes-networking%2F&amp;linkname=XDP%20%E2%80%94%20Packets%20Processed%20Before%20the%20Kernel%20Knows%20They%20Arrived" title="LinkedIn" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_copy_link" href="https://www.addtoany.com/add_to/copy_link?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-xdp-kubernetes-networking%2F&amp;linkname=XDP%20%E2%80%94%20Packets%20Processed%20Before%20the%20Kernel%20Knows%20They%20Arrived" title="Copy Link" rel="nofollow noopener" target="_blank"></a><a class="a2a_dd addtoany_share_save addtoany_share" href="https://www.addtoany.com/share#url=https%3A%2F%2Flinuxcent.com%2Febpf-xdp-kubernetes-networking%2F&#038;title=XDP%20%E2%80%94%20Packets%20Processed%20Before%20the%20Kernel%20Knows%20They%20Arrived" data-a2a-url="https://linuxcent.com/ebpf-xdp-kubernetes-networking/" data-a2a-title="XDP — Packets Processed Before the Kernel Knows They Arrived"></a></p><p>The post <a href="https://linuxcent.com/ebpf-xdp-kubernetes-networking/">XDP — Packets Processed Before the Kernel Knows They Arrived</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://linuxcent.com/ebpf-xdp-kubernetes-networking/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
		<post-id xmlns="com-wordpress:feed-additions:1">1540</post-id>	</item>
	</channel>
</rss>

<!--
Performance optimized by W3 Total Cache. Learn more: https://www.boldgrid.com/w3-total-cache/?utm_source=w3tc&utm_medium=footer_comment&utm_campaign=free_plugin

Page Caching using Disk: Enhanced 

Served from: linuxcent.com @ 2026-04-22 01:01:33 by W3 Total Cache
-->