From 4d59856c1eccc1321edbbd2fbe294da5be4ebb90 Mon Sep 17 00:00:00 2001
From: MajorLinux <marcus@majorshouse.com>
Date: Wed, 18 Mar 2026 11:00:41 -0400
Subject: [PATCH 1/5] wiki: add Netdata new server deployment guide (49
 articles)

---
 02-selfhosting/index.md                       |   1 +
 .../monitoring/netdata-new-server-setup.md    | 117 ++++++++++++++++++
 MajorWiki-Deploy-Status.md                    |  11 +-
 README.md                                     |   6 +-
 SUMMARY.md                                    |   1 +
 index.md                                      |   6 +-
 6 files changed, 137 insertions(+), 5 deletions(-)
 create mode 100644 02-selfhosting/monitoring/netdata-new-server-setup.md
diff --git a/02-selfhosting/index.md b/02-selfhosting/index.md
index 48300bf..282ff3e 100644
--- a/02-selfhosting/index.md
+++ b/02-selfhosting/index.md
@@ -24,6 +24,7 @@ Guides for running your own services at home, including Docker, reverse proxies,
 
 - [Tuning Netdata Web Log Alerts](monitoring/tuning-netdata-web-log-alerts.md)
 - [Tuning Netdata Docker Health Alarms](monitoring/netdata-docker-health-alarm-tuning.md)
+- [Deploying Netdata to a New Server](monitoring/netdata-new-server-setup.md)
 
 ## Security
 
diff --git a/02-selfhosting/monitoring/netdata-new-server-setup.md b/02-selfhosting/monitoring/netdata-new-server-setup.md
new file mode 100644
index 0000000..c086f6d
--- /dev/null
+++ b/02-selfhosting/monitoring/netdata-new-server-setup.md
@@ -0,0 +1,117 @@
+---
+title: "Deploying Netdata to a New Server"
+domain: selfhosting
+category: monitoring
+tags: [netdata, monitoring, email, notifications, netdata-cloud, ubuntu, debian]
+status: published
+created: 2026-03-18
+updated: 2026-03-18
+---
+
+# Deploying Netdata to a New Server
+
+This covers the full Netdata setup for a new server in the fleet: install, email notification config, and Netdata Cloud claim. Applies to Ubuntu/Debian servers.
+
+## 1. Install
+
+Use the official kickstart script:
+
+```bash
+wget -O /tmp/netdata-install.sh https://get.netdata.cloud/kickstart.sh
+sh /tmp/netdata-install.sh --non-interactive --stable-channel --disable-telemetry
+```
+
+Verify it's running:
+
+```bash
+systemctl is-active netdata
+curl -s http://localhost:19999/api/v1/info | python3 -c "import sys,json; d=json.load(sys.stdin); print('Netdata', d['version'])"
+```
+
+## 2. Configure Email Notifications
+
+Copy the default config and set the three required values:
+
+```bash
+cp /usr/lib/netdata/conf.d/health_alarm_notify.conf /etc/netdata/health_alarm_notify.conf
+```
+
+Edit `/etc/netdata/health_alarm_notify.conf`:
+
+```ini
+EMAIL_SENDER="netdata@majorshouse.com"
+SEND_EMAIL="YES"
+DEFAULT_RECIPIENT_EMAIL="marcus@majorshouse.com"
+```
+
+Or apply with `sed` in one shot:
+
+```bash
+sed -i 's/^#\?EMAIL_SENDER=.*/EMAIL_SENDER="netdata@majorshouse.com"/' /etc/netdata/health_alarm_notify.conf
+sed -i 's/^#\?SEND_EMAIL=.*/SEND_EMAIL="YES"/' /etc/netdata/health_alarm_notify.conf
+sed -i 's/^#\?DEFAULT_RECIPIENT_EMAIL=.*/DEFAULT_RECIPIENT_EMAIL="marcus@majorshouse.com"/' /etc/netdata/health_alarm_notify.conf
+```
+
+Restart and test:
+
+```bash
+systemctl restart netdata
+/usr/libexec/netdata/plugins.d/alarm-notify.sh test 2>&1 | grep -E '(OK|FAILED|email)'
+```
+
+You should see three `# OK` lines (WARNING → CRITICAL → CLEAR test cycle) and confirmation that email was sent to `marcus@majorshouse.com`.
+
+> [!note] Delivery via local Postfix
+> Email is relayed through the server's local Postfix instance. Ensure Postfix is installed and `/usr/sbin/sendmail` resolves.
+
+## 3. Claim to Netdata Cloud
+
+Get the claim command from **Netdata Cloud → Space Settings → Nodes → Add Nodes**. It will look like:
+
+```bash
+wget -O /tmp/netdata-kickstart.sh https://get.netdata.cloud/kickstart.sh
+sh /tmp/netdata-kickstart.sh --stable-channel \
+  --claim-token <token> \
+  --claim-rooms <room-id> \
+  --claim-url https://app.netdata.cloud
+```
+
+Verify the claim was accepted:
+
+```bash
+cat /var/lib/netdata/cloud.d/claimed_id
+```
+
+A UUID will be present if claimed successfully. The node should appear in Netdata Cloud within ~60 seconds.
+
+## 4. Verify Alerts
+
+Check that no unexpected alerts are active after setup:
+
+```bash
+curl -s 'http://localhost:19999/api/v1/alarms?active' | python3 -c "
+import sys, json
+d = json.load(sys.stdin)
+active = [v for v in d.get('alarms', {}).values() if v.get('status') not in ('CLEAR', 'UNINITIALIZED', 'UNDEFINED')]
+print(f'{len(active)} active alert(s)')
+for v in active:
+    print(f'  [{v[\"status\"]}] {v[\"name\"]} on {v[\"chart\"]}')
+"
+```
+
+## Fleet-wide Alert Check
+
+To audit all servers at once (requires Tailscale SSH access):
+
+```bash
+for host in majorlab majorhome majormail majordiscord majortoot majorlinux tttpod dca teelia; do
+  echo "=== $host ==="
+  ssh root@$host "curl -s 'http://localhost:19999/api/v1/alarms?active' | python3 -c \
+    \"import sys,json; d=json.load(sys.stdin); active=[v for v in d.get('alarms',{}).values() if v.get('status') not in ('CLEAR','UNINITIALIZED','UNDEFINED')]; print(str(len(active))+' active')\""
+done
+```
+
+## Related
+
+- [Tuning Netdata Web Log Alerts](tuning-netdata-web-log-alerts.md)
+- [Tuning Netdata Docker Health Alarms](netdata-docker-health-alarm-tuning.md)
diff --git a/MajorWiki-Deploy-Status.md b/MajorWiki-Deploy-Status.md
index 6929b5d..407453f 100644
--- a/MajorWiki-Deploy-Status.md
+++ b/MajorWiki-Deploy-Status.md
@@ -128,7 +128,7 @@ Every time a new article is added, the following **MUST** be updated to maintain
 
 **Updated:** `updated: 2026-03-17`
 
-## Session Update — 2026-03-18
+## Session Update — 2026-03-18 (morning)
 
 **Article count:** 48 (was 47)
 
@@ -136,3 +136,12 @@ Every time a new article is added, the following **MUST** be updated to maintain
 - `02-selfhosting/monitoring/netdata-docker-health-alarm-tuning.md` — tuning docker_container_unhealthy alarm to prevent flapping during Nextcloud AIO updates
 
 **Updated:** `updated: 2026-03-18`
+
+## Session Update — 2026-03-18 (afternoon)
+
+**Article count:** 49 (was 48)
+
+**New articles added:**
+- `02-selfhosting/monitoring/netdata-new-server-setup.md` — full Netdata deployment guide: install via kickstart.sh, email notification config, Netdata Cloud claim
+
+**Updated:** `updated: 2026-03-18`
diff --git a/README.md b/README.md
index 32da4a2..68101e3 100644
--- a/README.md
+++ b/README.md
@@ -3,14 +3,14 @@
 > A growing reference of Linux, self-hosting, open source, streaming, and troubleshooting guides. Written by MajorLinux. Used by MajorTwin.
 >
 **Last updated:** 2026-03-18
-**Article count:** 48
+**Article count:** 49
 
 ## Domains
 
 | Domain | Folder | Articles |
 |---|---|---|
 | 🐧 Linux & Sysadmin | `01-linux/` | 11 |
-| 🏠 Self-Hosting & Homelab | `02-selfhosting/` | 10 |
+| 🏠 Self-Hosting & Homelab | `02-selfhosting/` | 11 |
 | 🔓 Open Source Tools | `03-opensource/` | 9 |
 | 🎙️ Streaming & Podcasting | `04-streaming/` | 2 |
 | 🔧 General Troubleshooting | `05-troubleshooting/` | 16 |
@@ -65,6 +65,7 @@
 ### Monitoring
 - [Tuning Netdata Web Log Alerts](02-selfhosting/monitoring/tuning-netdata-web-log-alerts.md) — tuning web_log_1m_redirects threshold for HTTPS-forcing servers
 - [Tuning Netdata Docker Health Alarms](02-selfhosting/monitoring/netdata-docker-health-alarm-tuning.md) — preventing false alerts during nightly Nextcloud AIO container update cycles
+- [Deploying Netdata to a New Server](02-selfhosting/monitoring/netdata-new-server-setup.md) — install, email notifications, and Netdata Cloud claim for Ubuntu/Debian servers
 
 ### Security
 - [Linux Server Hardening Checklist](02-selfhosting/security/linux-server-hardening-checklist.md) — non-root user, SSH key auth, sshd_config, firewall, fail2ban
@@ -129,6 +130,7 @@
 
 | Date | Article | Domain |
 |---|---|---|
+| 2026-03-18 | [Deploying Netdata to a New Server](02-selfhosting/monitoring/netdata-new-server-setup.md) | Self-Hosting |
 | 2026-03-18 | [Tuning Netdata Docker Health Alarms](02-selfhosting/monitoring/netdata-docker-health-alarm-tuning.md) | Self-Hosting |
 | 2026-03-17 | [Ollama Drops Off Tailscale When Mac Sleeps](05-troubleshooting/ollama-macos-sleep-tailscale-disconnect.md) | Troubleshooting |
 | 2026-03-17 | [Windows OpenSSH Server (sshd) Stops After Reboot](05-troubleshooting/networking/windows-sshd-stops-after-reboot.md) | Troubleshooting |
diff --git a/SUMMARY.md b/SUMMARY.md
index 3590afc..81ea689 100644
--- a/SUMMARY.md
+++ b/SUMMARY.md
@@ -20,6 +20,7 @@
     * [rsync Backup Patterns](02-selfhosting/storage-backup/rsync-backup-patterns.md)
     * [Tuning Netdata Web Log Alerts](02-selfhosting/monitoring/tuning-netdata-web-log-alerts.md)
     * [Tuning Netdata Docker Health Alarms](02-selfhosting/monitoring/netdata-docker-health-alarm-tuning.md)
+    * [Deploying Netdata to a New Server](02-selfhosting/monitoring/netdata-new-server-setup.md)
     * [Linux Server Hardening Checklist](02-selfhosting/security/linux-server-hardening-checklist.md)
     * [Standardizing unattended-upgrades with Ansible](02-selfhosting/security/ansible-unattended-upgrades-fleet.md)
 * [Open Source & Alternatives](03-opensource/index.md)
diff --git a/index.md b/index.md
index 30a2d1d..e807071 100644
--- a/index.md
+++ b/index.md
@@ -3,14 +3,14 @@
 > A growing reference of Linux, self-hosting, open source, streaming, and troubleshooting guides. Written by MajorLinux. Used by MajorTwin.
 >
 > **Last updated:** 2026-03-18
-> **Article count:** 48
+> **Article count:** 49
 
 ## Domains
 
 | Domain | Folder | Articles |
 |---|---|---|
 | 🐧 Linux & Sysadmin | `01-linux/` | 11 |
-| 🏠 Self-Hosting & Homelab | `02-selfhosting/` | 10 |
+| 🏠 Self-Hosting & Homelab | `02-selfhosting/` | 11 |
 | 🔓 Open Source Tools | `03-opensource/` | 9 |
 | 🎙️ Streaming & Podcasting | `04-streaming/` | 2 |
 | 🔧 General Troubleshooting | `05-troubleshooting/` | 16 |
@@ -65,6 +65,7 @@
 ### Monitoring
 - [Tuning Netdata Web Log Alerts](02-selfhosting/monitoring/tuning-netdata-web-log-alerts.md) — tuning web_log_1m_redirects threshold for HTTPS-forcing servers
 - [Tuning Netdata Docker Health Alarms](02-selfhosting/monitoring/netdata-docker-health-alarm-tuning.md) — preventing false alerts during nightly Nextcloud AIO container update cycles
+- [Deploying Netdata to a New Server](02-selfhosting/monitoring/netdata-new-server-setup.md) — install, email notifications, and Netdata Cloud claim for Ubuntu/Debian servers
 
 ### Security
 - [Linux Server Hardening Checklist](02-selfhosting/security/linux-server-hardening-checklist.md) — non-root user, SSH key auth, sshd_config, firewall, fail2ban
@@ -129,6 +130,7 @@
 
 | Date | Article | Domain |
 |---|---|---|
+| 2026-03-18 | [Deploying Netdata to a New Server](02-selfhosting/monitoring/netdata-new-server-setup.md) | Self-Hosting |
 | 2026-03-18 | [Tuning Netdata Docker Health Alarms](02-selfhosting/monitoring/netdata-docker-health-alarm-tuning.md) | Self-Hosting |
 | 2026-03-17 | [Ollama Drops Off Tailscale When Mac Sleeps](05-troubleshooting/ollama-macos-sleep-tailscale-disconnect.md) | Troubleshooting |
 | 2026-03-17 | [Windows OpenSSH Server (sshd) Stops After Reboot](05-troubleshooting/networking/windows-sshd-stops-after-reboot.md) | Troubleshooting |

From c4d3f8e9740cea1d237e4c3a7fe164a3e9daec62 Mon Sep 17 00:00:00 2001
From: MajorLinux <marcus@majorshouse.com>
Date: Sat, 21 Mar 2026 00:12:52 -0400
Subject: [PATCH 2/5] wiki: add Tailscale SSH reauth article; update Netdata
 Docker alarm tuning (50 articles)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- New: Tailscale SSH unexpected re-authentication prompt — diagnosis and fix
- Updated: netdata-docker-health-alarm-tuning — add delay: up 3m to suppress
  Nextcloud AIO PHP-FPM ~90s startup false alerts; update settings table and notes
- Updated: 05-troubleshooting/index.md and SUMMARY.md

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../netdata-docker-health-alarm-tuning.md     | 11 ++--
 05-troubleshooting/index.md                   |  1 +
 .../networking/tailscale-ssh-reauth-prompt.md | 66 +++++++++++++++++++
 SUMMARY.md                                    |  1 +
 4 files changed, 74 insertions(+), 5 deletions(-)
 create mode 100644 05-troubleshooting/networking/tailscale-ssh-reauth-prompt.md

diff --git a/02-selfhosting/monitoring/netdata-docker-health-alarm-tuning.md b/02-selfhosting/monitoring/netdata-docker-health-alarm-tuning.md
index fe116ac..e232ed5 100644
--- a/02-selfhosting/monitoring/netdata-docker-health-alarm-tuning.md
+++ b/02-selfhosting/monitoring/netdata-docker-health-alarm-tuning.md
@@ -5,7 +5,7 @@ category: monitoring
 tags: [netdata, docker, nextcloud, alarms, health, monitoring]
 status: published
 created: 2026-03-18
-updated: 2026-03-18
+updated: 2026-03-21
 ---
 
 # Tuning Netdata Docker Health Alarms to Prevent Update Flapping
@@ -40,7 +40,7 @@ component: Docker
     every: 30s
    lookup: average -5m of unhealthy
      warn: $this > 0
-    delay: down 5m multiplier 1.5 max 30m
+    delay: up 3m down 5m multiplier 1.5 max 30m
   summary: Docker container ${label:container_name} health
      info: ${label:container_name} docker container health status is unhealthy
        to: sysadmin
@@ -49,10 +49,11 @@ component: Docker
 | Setting | Default | Tuned | Effect |
 |---|---|---|---|
 | `every` | 10s | 30s | Check less frequently |
-| `lookup` | average -10s | average -5m | Must be unhealthy for sustained 5 minutes |
-| `delay` | none | down 5m (max 30m) | Grace period after recovery before clearing |
+| `lookup` | average -10s | average -5m | Smooths transient unhealthy samples over 5 minutes |
+| `delay: up 3m` | none | 3m | Won't fire until unhealthy condition persists for 3 continuous minutes |
+| `delay: down 5m` | none | 5m (max 30m) | Grace period after recovery before clearing |
 
-A typical Nextcloud AIO update cycle (30–90 seconds of container restarts) won't sustain 5 minutes of unhealthy status, so no alert fires. A genuinely broken container will still be caught.
+The `up` delay is the critical addition. Nextcloud AIO's `nextcloud-aio-nextcloud` container checks both PostgreSQL (port 5432) and PHP-FPM (port 9000). PHP-FPM takes ~90 seconds to warm up after a restart, causing 2–3 failing health checks before the container becomes healthy. With `delay: up 3m`, Netdata waits for 3 continuous minutes of unhealthy status before firing — absorbing the ~90 second startup window with margin to spare. A genuinely broken container will still trigger the alert.
 
 ## Applying the Config
 
diff --git a/05-troubleshooting/index.md b/05-troubleshooting/index.md
index 8de9d8e..d58a20a 100644
--- a/05-troubleshooting/index.md
+++ b/05-troubleshooting/index.md
@@ -9,6 +9,7 @@ Practical fixes for common Linux, networking, and application problems.
 - [Apache Outage: Fail2ban Self-Ban + Missing iptables Rules](networking/fail2ban-self-ban-apache-outage.md)
 - [Mail Client Stops Receiving: Fail2ban IMAP Self-Ban](networking/fail2ban-imap-self-ban-mail-client.md)
 - [firewalld: Mail Ports Wiped After Reload](networking/firewalld-mail-ports-reset.md)
+- [Tailscale SSH: Unexpected Re-Authentication Prompt](networking/tailscale-ssh-reauth-prompt.md)
 - [ISP SNI Filtering & Caddy](isp-sni-filtering-caddy.md)
 - [yt-dlp YouTube JS Challenge Fix](yt-dlp-fedora-js-challenge.md)
 
diff --git a/05-troubleshooting/networking/tailscale-ssh-reauth-prompt.md b/05-troubleshooting/networking/tailscale-ssh-reauth-prompt.md
new file mode 100644
index 0000000..36937a0
--- /dev/null
+++ b/05-troubleshooting/networking/tailscale-ssh-reauth-prompt.md
@@ -0,0 +1,66 @@
+# Tailscale SSH: Unexpected Re-Authentication Prompt
+
+If a Tailscale SSH connection unexpectedly presents a browser authentication URL mid-session, the first instinct is to check the ACL policy. However, this is often a one-off Tailscale hiccup rather than a misconfiguration.
+
+## Symptoms
+
+- SSH connection to a fleet node displays a Tailscale auth URL:
+  ```
+  To authenticate, visit: https://login.tailscale.com/a/xxxxxxxx
+  ```
+- The prompt appears even though the node worked fine previously
+- Other nodes in the fleet connect without prompting
+
+## What Causes It
+
+Tailscale SSH supports two ACL `action` values:
+
+| Action | Behavior |
+|---|---|
+| `accept` | Trusts Tailscale identity — no additional auth required |
+| `check` | Requires periodic browser-based re-authentication |
+
+If `action: "check"` is set, every session (or after token expiry) will prompt for browser auth. However, even with `action: "accept"`, a one-off prompt can appear due to a Tailscale daemon glitch or key refresh event.
+
+## How to Diagnose
+
+### 1. Verify the ACL policy
+
+In the Tailscale admin console (or via `tailscale debug acl`), inspect the SSH rules. For a trusted homelab fleet, the rule should use `accept`:
+
+```json
+{
+    "src":    ["autogroup:member"],
+    "dst":    ["autogroup:self"],
+    "users":  ["autogroup:nonroot", "root"],
+    "action": "accept",
+}
+```
+
+If `action` is `check`, that is the root cause — change it to `accept` for trusted source/destination pairs.
+
+### 2. Confirm it was a one-off
+
+If the ACL already shows `accept`, the prompt was transient. Test with:
+
+```bash
+ssh <hostname> "echo ok"
+```
+
+No auth prompt + `ok` output = resolved. Note that this test is only meaningful if the previous session's auth token has expired, or you test from a different device that hasn't recently authenticated.
+
+## Fix
+
+**If ACL shows `check`:** Change to `accept` in the Tailscale admin console under Access Controls. Takes effect immediately — no server changes needed.
+
+**If ACL already shows `accept`:** No action required. The prompt was a one-off Tailscale event (daemon restart, key refresh, etc.). Monitor for recurrence.
+
+## Notes
+
+- Port 2222 on **MajorRig** exists as a hard bypass for Tailscale SSH browser auth — regular SSH over Tailscale network, bypassing Tailscale SSH entirely. This is an alternative approach if `check` mode is required for compliance but browser auth is too disruptive.
+- The `autogroup:self` destination means the rule applies when connecting from your own devices to your own devices — appropriate for a personal homelab fleet.
+
+## Related
+
+- [[Network Overview]] — Tailscale fleet inventory and SSH access model
+- [[SSH-Aliases]] — Fleet SSH access shortcuts
diff --git a/SUMMARY.md b/SUMMARY.md
index 81ea689..5ccb815 100644
--- a/SUMMARY.md
+++ b/SUMMARY.md
@@ -40,6 +40,7 @@
     * [Apache Outage: Fail2ban Self-Ban + Missing iptables Rules](05-troubleshooting/networking/fail2ban-self-ban-apache-outage.md)
     * [Mail Client Stops Receiving: Fail2ban IMAP Self-Ban](05-troubleshooting/networking/fail2ban-imap-self-ban-mail-client.md)
     * [firewalld: Mail Ports Wiped After Reload](05-troubleshooting/networking/firewalld-mail-ports-reset.md)
+    * [Tailscale SSH: Unexpected Re-Authentication Prompt](05-troubleshooting/networking/tailscale-ssh-reauth-prompt.md)
     * [Docker & Caddy Recovery After Reboot (Fedora + SELinux)](05-troubleshooting/docker-caddy-selinux-post-reboot-recovery.md)
     * [ISP SNI Filtering with Caddy](05-troubleshooting/isp-sni-filtering-caddy.md)
     * [Obsidian Vault Recovery — Loading Cache Hang](05-troubleshooting/obsidian-cache-hang-recovery.md)

From 9e205f60e46563b4371bf09d99a8c3dcd50ab88c Mon Sep 17 00:00:00 2001
From: MajorLinux <marcus@majorshouse.com>
Date: Sat, 21 Mar 2026 04:25:56 -0400
Subject: [PATCH 3/5] wiki: add Netdata n8n enriched alert pipeline article (51
 articles)

---
 .../monitoring/netdata-n8n-enriched-alerts.md | 153 ++++++++++++++++++
 1 file changed, 153 insertions(+)
 create mode 100644 02-selfhosting/monitoring/netdata-n8n-enriched-alerts.md

diff --git a/02-selfhosting/monitoring/netdata-n8n-enriched-alerts.md b/02-selfhosting/monitoring/netdata-n8n-enriched-alerts.md
new file mode 100644
index 0000000..d62b7f1
--- /dev/null
+++ b/02-selfhosting/monitoring/netdata-n8n-enriched-alerts.md
@@ -0,0 +1,153 @@
+# Netdata → n8n Enriched Alert Emails
+
+**Status:** Live across all MajorsHouse fleet servers as of 2026-03-21
+
+Replaces Netdata's plain-text alert emails with rich HTML emails that include a plain-English explanation, a suggested remediation command, and a direct link to the relevant MajorWiki article.
+
+---
+
+## How It Works
+
+```
+Netdata alarm fires
+  → custom_sender() in health_alarm_notify.conf
+    → POST JSON payload to n8n webhook
+      → Code node enriches with suggestion + wiki link
+        → Send Email node sends HTML email via SMTP
+          → Respond node returns 200 OK
+```
+
+---
+
+## n8n Workflow
+
+**Name:** Netdata Enriched Alerts  
+**URL:** https://n8n.majorshouse.com  
+**Webhook endpoint:** `POST https://n8n.majorshouse.com/webhook/netdata-alert`  
+**Workflow ID:** `a1b2c3d4-aaaa-bbbb-cccc-000000000001`
+
+### Nodes
+
+1. **Netdata Webhook** — receives POST from Netdata's `custom_sender()`
+2. **Enrich Alert** — Code node; matches alarm/chart/family to enrichment table, builds HTML email body in `$json.emailBody`
+3. **Send Enriched Email** — sends via SMTP port 465 (SMTP account 2), from `netdata@majorshouse.com` to `marcus@majorshouse.com`
+4. **Respond OK** — returns `ok` with HTTP 200 to Netdata
+
+### Enrichment Keys
+
+The Code node matches on `alarm`, `chart`, or `family` field (case-insensitive substring):
+
+| Key | Title | Wiki Article |
+|-----|-------|-------------|
+| `disk_space` | Disk Space Alert | snapraid-mergerfs-setup |
+| `ram` | Memory Alert | managing-linux-services-systemd-ansible |
+| `cpu` | CPU Alert | managing-linux-services-systemd-ansible |
+| `load` | Load Average Alert | managing-linux-services-systemd-ansible |
+| `net` | Network Alert | tailscale-homelab-remote-access |
+| `docker` | Docker Container Alert | debugging-broken-docker-containers |
+| `web_log` | Web Log Alert | tuning-netdata-web-log-alerts |
+| `health` | Docker Health Alarm | netdata-docker-health-alarm-tuning |
+| `mdstat` | RAID Array Alert | mdadm-usb-hub-disconnect-recovery |
+| `systemd` | Systemd Service Alert | docker-caddy-selinux-post-reboot-recovery |
+| _(no match)_ | Server Alert | netdata-new-server-setup |
+
+---
+
+## Netdata Configuration
+
+### Config File Locations
+
+| Server | Path |
+|--------|------|
+| majorhome, majormail, majordiscord, tttpod, teelia | `/etc/netdata/health_alarm_notify.conf` |
+| majorlinux, majortoot, dca | `/usr/lib/netdata/conf.d/health_alarm_notify.conf` |
+
+### Required Settings
+
+```bash
+DEFAULT_RECIPIENT_CUSTOM="n8n"
+role_recipients_custom[sysadmin]="${DEFAULT_RECIPIENT_CUSTOM}"
+```
+
+### custom_sender() Function
+
+```bash
+custom_sender() {
+    local to="${1}"
+    local payload
+    payload=$(jq -n \
+        --arg hostname "${host}" \
+        --arg alarm "${name}" \
+        --arg chart "${chart}" \
+        --arg family "${family}" \
+        --arg status "${status}" \
+        --arg old_status "${old_status}" \
+        --arg value "${value_string}" \
+        --arg units "${units}" \
+        --arg info "${info}" \
+        --arg alert_url "${goto_url}" \
+        --arg severity "${severity}" \
+        --arg raised_for "${raised_for}" \
+        --arg total_warnings "${total_warnings}" \
+        --arg total_critical "${total_critical}" \
+        '{hostname:$hostname,alarm:$alarm,chart:$chart,family:$family,status:$status,old_status:$old_status,value:$value,units:$units,info:$info,alert_url:$alert_url,severity:$severity,raised_for:$raised_for,total_warnings:$total_warnings,total_critical:$total_critical}')
+    local httpcode
+    httpcode=$(docurl -s -o /dev/null -w "%{http_code}" \
+        -X POST \
+        -H "Content-Type: application/json" \
+        -d "${payload}" \
+        "https://n8n.majorshouse.com/webhook/netdata-alert")
+    if [ "${httpcode}" = "200" ]; then
+        info "sent enriched notification to n8n for ${status} of ${host}.${name}"
+        sent=$((sent + 1))
+    else
+        error "failed to send notification to n8n, HTTP code: ${httpcode}"
+    fi
+}
+```
+
+!!! note "jq required"
+    The `custom_sender()` function requires `jq` to be installed. Verify with `which jq` on each server.
+
+---
+
+## Deploying to a New Server
+
+```bash
+# 1. Find the config file
+find /etc/netdata /usr/lib/netdata -name health_alarm_notify.conf 2>/dev/null
+
+# 2. Edit it — add the two lines and the custom_sender() function above
+
+# 3. Test connectivity from the server
+curl -s -o /dev/null -w "%{http_code}" \
+  -X POST https://n8n.majorshouse.com/webhook/netdata-alert \
+  -H "Content-Type: application/json" \
+  -d '{"hostname":"test","alarm":"disk_space._","status":"WARNING"}'
+# Expected: 200
+
+# 4. Restart Netdata
+systemctl restart netdata
+
+# 5. Send a test alarm
+/usr/libexec/netdata/plugins.d/alarm-notify.sh test custom
+```
+
+---
+
+## Troubleshooting
+
+**Emails not arriving — check n8n execution log:**  
+Go to https://n8n.majorshouse.com → open "Netdata Enriched Alerts" → Executions tab. Look for `error` status entries.
+
+**Email body empty:**  
+The Send Email node's HTML field must be `={{ $json.emailBody }}`. Shell variable expansion can silently strip `$json` if the workflow is patched via inline SSH commands — always use a Python script file.
+
+**`000` curl response from a server:**  
+Usually a timeout, not a DNS or connection failure. Re-test with `--max-time 30`.
+
+**`custom_sender()` syntax error in Netdata logs:**  
+Bash heredocs don't work inside sourced config files. Use `jq -n --arg ...` as shown above — no heredocs.
+
+**n8n `N8N_TRUST_PROXY` must be set:**  
+Without `N8N_TRUST_PROXY=true` in the Docker environment, Caddy's `X-Forwarded-For` header causes n8n's rate limiter to abort requests before parsing the body. Set in `/opt/n8n/compose.yml`.

From d1e9571761da1e01cc88e5c915834d41b779b14f Mon Sep 17 00:00:00 2001
From: MajorLinux <marcus@majorshouse.com>
Date: Sun, 22 Mar 2026 03:17:19 -0400
Subject: [PATCH 4/5] =?UTF-8?q?wiki:=20update=20Netdata=20Docker=20alarm?=
 =?UTF-8?q?=20tuning=20=E2=80=94=20add=20docker=5Fcontainer=5Fdown=20suppr?=
 =?UTF-8?q?ession?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Nextcloud AIO borgbackup and watchtower exit normally after nightly update/backup
cycles. Added docker_container_down override with chart labels to exclude them,
preventing false alerts. Documents chart labels pattern syntax.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../netdata-docker-health-alarm-tuning.md     | 31 +++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/02-selfhosting/monitoring/netdata-docker-health-alarm-tuning.md b/02-selfhosting/monitoring/netdata-docker-health-alarm-tuning.md
index e232ed5..01a9e2f 100644
--- a/02-selfhosting/monitoring/netdata-docker-health-alarm-tuning.md
+++ b/02-selfhosting/monitoring/netdata-docker-health-alarm-tuning.md
@@ -5,7 +5,7 @@ category: monitoring
 tags: [netdata, docker, nextcloud, alarms, health, monitoring]
 status: published
 created: 2026-03-18
-updated: 2026-03-21
+updated: 2026-03-22
 ---
 
 # Tuning Netdata Docker Health Alarms to Prevent Update Flapping
@@ -55,6 +55,33 @@ component: Docker
 
 The `up` delay is the critical addition. Nextcloud AIO's `nextcloud-aio-nextcloud` container checks both PostgreSQL (port 5432) and PHP-FPM (port 9000). PHP-FPM takes ~90 seconds to warm up after a restart, causing 2–3 failing health checks before the container becomes healthy. With `delay: up 3m`, Netdata waits for 3 continuous minutes of unhealthy status before firing — absorbing the ~90 second startup window with margin to spare. A genuinely broken container will still trigger the alert.
 
+## Also: Suppress `docker_container_down` for Normally-Exiting Containers
+
+Nextcloud AIO runs `borgbackup` (scheduled backups) and `watchtower` (auto-updates) as containers that exit with code 0 after completing their work. The stock `docker_container_down` alarm fires on any exited container, generating false alerts after every nightly cycle.
+
+Add a second override to the same file using `chart labels` to exclude them:
+
+```ini
+# Suppress docker_container_down for Nextcloud AIO containers that exit normally
+# (borgbackup runs on schedule then exits; watchtower does updates then exits)
+template: docker_container_down
+       on: docker.container_running_state
+    class: Errors
+     type: Containers
+component: Docker
+    units: status
+    every: 30s
+   lookup: average -5m of down
+chart labels: container_name=!nextcloud-aio-borgbackup !nextcloud-aio-watchtower *
+     warn: $this > 0
+    delay: up 3m down 5m multiplier 1.5 max 30m
+  summary: Docker container ${label:container_name} down
+     info: ${label:container_name} docker container is down
+       to: sysadmin
+```
+
+The `chart labels` line uses Netdata's simple pattern syntax — `!` prefix excludes a container, `*` matches everything else. All other exited containers still alert normally.
+
 ## Applying the Config
 
 ```bash
@@ -75,7 +102,7 @@ In the Netdata UI, navigate to **Alerts → Manage Alerts** and search for `dock
 
 ## Notes
 
-- This only overrides the `docker_container_unhealthy` alarm. The `docker_container_down` alarm (for exited containers) is left at its default — it already has a `delay: down 1m` and is disabled by default (`chart labels: container_name=!*`).
+- Both `docker_container_unhealthy` and `docker_container_down` are overridden in this config. Any container not explicitly excluded in the `chart labels` filter will still alert normally.
 - If you want per-container silencing instead of a blanket delay, use the `host labels` or `chart labels` filter to scope the alarm to specific containers.
 - Config volume path on majorlab: `/var/lib/docker/volumes/netdata_netdataconfig/_data/`
 

From 0e640a3fffe689870a67180e6444b83d5061bfee Mon Sep 17 00:00:00 2001
From: MajorLinux <marcus@majorshouse.com>
Date: Sun, 22 Mar 2026 03:36:49 -0400
Subject: [PATCH 5/5] wiki: add ClamAV safe scheduling article; update Netdata
 new server setup

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../monitoring/netdata-new-server-setup.md    | 58 +++++++++++++--
 .../clamscan-cpu-spike-nice-ionice.md         | 73 +++++++++++++++++++
 SUMMARY.md                                    |  1 +
 3 files changed, 125 insertions(+), 7 deletions(-)
 create mode 100644 05-troubleshooting/security/clamscan-cpu-spike-nice-ionice.md

diff --git a/02-selfhosting/monitoring/netdata-new-server-setup.md b/02-selfhosting/monitoring/netdata-new-server-setup.md
index c086f6d..c91d6f9 100644
--- a/02-selfhosting/monitoring/netdata-new-server-setup.md
+++ b/02-selfhosting/monitoring/netdata-new-server-setup.md
@@ -2,17 +2,31 @@
 title: "Deploying Netdata to a New Server"
 domain: selfhosting
 category: monitoring
-tags: [netdata, monitoring, email, notifications, netdata-cloud, ubuntu, debian]
+tags: [netdata, monitoring, email, notifications, netdata-cloud, ubuntu, debian, n8n]
 status: published
 created: 2026-03-18
-updated: 2026-03-18
+updated: 2026-03-22
 ---
 
 # Deploying Netdata to a New Server
 
-This covers the full Netdata setup for a new server in the fleet: install, email notification config, and Netdata Cloud claim. Applies to Ubuntu/Debian servers.
+This covers the full Netdata setup for a new server in the fleet: install, email notification config, n8n webhook integration, and Netdata Cloud claim. Applies to Ubuntu/Debian servers.
 
-## 1. Install
+## 1. Install Prerequisites
+
+Install `jq` before anything else. It is required by the `custom_sender()` function in `health_alarm_notify.conf` to build the JSON payload sent to the n8n webhook. **If `jq` is missing, the webhook will fire with an empty body and n8n alert emails will have no information in them.**
+
+```bash
+apt install -y jq
+```
+
+Verify:
+
+```bash
+jq --version
+```
+
+## 2. Install Netdata
 
 Use the official kickstart script:
 
@@ -28,7 +42,7 @@ systemctl is-active netdata
 curl -s http://localhost:19999/api/v1/info | python3 -c "import sys,json; d=json.load(sys.stdin); print('Netdata', d['version'])"
 ```
 
-## 2. Configure Email Notifications
+## 3. Configure Email Notifications
 
 Copy the default config and set the three required values:
 
@@ -64,7 +78,23 @@ You should see three `# OK` lines (WARNING → CRITICAL → CLEAR test cycle) an
 > [!note] Delivery via local Postfix
 > Email is relayed through the server's local Postfix instance. Ensure Postfix is installed and `/usr/sbin/sendmail` resolves.
 
-## 3. Claim to Netdata Cloud
+## 4. Configure n8n Webhook Notifications
+
+Copy the `health_alarm_notify.conf` from an existing server (e.g. majormail) which contains the `custom_sender()` function. This sends enriched JSON payloads to the n8n webhook at `https://n8n.majorshouse.com/webhook/netdata-alert`.
+
+> [!warning] jq required
+> The `custom_sender()` function uses `jq` to build the JSON payload. If `jq` is not installed, `payload` will be empty, curl will send `Content-Length: 0`, and n8n will produce alert emails with `Host: unknown`, blank alert/value fields, and `Status: UNKNOWN`. Always install `jq` first (Step 1).
+
+After deploying the config, run a test to confirm the webhook fires correctly:
+
+```bash
+systemctl restart netdata
+/usr/libexec/netdata/plugins.d/alarm-notify.sh test 2>&1 | grep -E '(custom|n8n|OK|FAILED)'
+```
+
+Verify in n8n that the latest execution shows a non-empty body with `hostname`, `alarm`, and `status` fields populated.
+
+## 5. Claim to Netdata Cloud
 
 Get the claim command from **Netdata Cloud → Space Settings → Nodes → Add Nodes**. It will look like:
 
@@ -84,7 +114,7 @@ cat /var/lib/netdata/cloud.d/claimed_id
 
 A UUID will be present if claimed successfully. The node should appear in Netdata Cloud within ~60 seconds.
 
-## 4. Verify Alerts
+## 6. Verify Alerts
 
 Check that no unexpected alerts are active after setup:
 
@@ -111,6 +141,20 @@ for host in majorlab majorhome majormail majordiscord majortoot majorlinux tttpo
 done
 ```
 
+## Fleet-wide jq Audit
+
+To check that all servers with `custom_sender` have `jq` installed:
+
+```bash
+for host in majorlab majorhome majormail majordiscord majortoot majorlinux tttpod dca teelia; do
+  echo -n "=== $host: "
+  ssh -o ConnectTimeout=5 root@$host \
+    'has_cs=$(grep -l "custom_sender\|n8n.majorshouse.com" /etc/netdata/health_alarm_notify.conf 2>/dev/null | wc -l); has_jq=$(which jq 2>/dev/null && echo yes || echo NO); echo "custom_sender=$has_cs jq=$has_jq"'
+done
+```
+
+Any server showing `custom_sender=1 jq=NO` needs `apt install -y jq` immediately.
+
 ## Related
 
 - [Tuning Netdata Web Log Alerts](tuning-netdata-web-log-alerts.md)
diff --git a/05-troubleshooting/security/clamscan-cpu-spike-nice-ionice.md b/05-troubleshooting/security/clamscan-cpu-spike-nice-ionice.md
new file mode 100644
index 0000000..a0480f0
--- /dev/null
+++ b/05-troubleshooting/security/clamscan-cpu-spike-nice-ionice.md
@@ -0,0 +1,73 @@
+# ClamAV Safe Scheduling on Live Servers
+
+Running `clamscan` unthrottled on a live server will peg CPU until completion. On a small VPS (1 vCPU), a full recursive scan can sustain 70–100% CPU for an hour or more, degrading or taking down hosted services.
+
+## The Problem
+
+A common out-of-the-box ClamAV cron setup looks like this:
+
+```cron
+0 1 * * 0 clamscan --infected --recursive / --exclude=/sys
+```
+
+This runs at Linux's default scheduling priority (`nice 0`) with normal I/O priority. On a live server it will:
+
+- Monopolize the CPU for the scan duration
+- Cause high I/O wait, degrading web serving, databases, and other services
+- Trigger monitoring alerts (e.g., Netdata `10min_cpu_usage`)
+
+## The Fix
+
+Throttle the scan with `nice` and `ionice`:
+
+```cron
+0 1 * * 0 nice -n 19 ionice -c 3 clamscan --infected --recursive / --exclude=/sys
+```
+
+| Flag | Meaning |
+|------|---------|
+| `nice -n 19` | Lowest CPU scheduling priority (range: -20 to 19) |
+| `ionice -c 3` | Idle I/O class — only uses disk when no other process needs it |
+
+The scan will take longer but will not impact server performance.
+
+## Applying the Fix
+
+Edit root's crontab:
+
+```bash
+crontab -e
+```
+
+Or apply non-interactively:
+
+```bash
+crontab -l | sed 's|clamscan|nice -n 19 ionice -c 3 clamscan|' | crontab -
+```
+
+Verify:
+
+```bash
+crontab -l | grep clam
+```
+
+## Diagnosing a Runaway Scan
+
+If CPU is already pegged, identify and kill the process:
+
+```bash
+ps aux --sort=-%cpu | head -15
+# Look for clamscan
+kill <PID>
+```
+
+## Notes
+
+- `ionice -c 3` (Idle) requires Linux kernel ≥ 2.6.13 and CFQ/BFQ I/O scheduler. Works on most Ubuntu/Debian/Fedora systems.
+- On multi-core servers, consider also using `cpulimit` for a hard cap: `cpulimit -l 30 -- clamscan ...`
+- Always keep `--exclude=/sys` (and optionally `--exclude=/proc`, `--exclude=/dev`) to avoid scanning virtual filesystems.
+
+## Related
+
+- [ClamAV Documentation](https://docs.clamav.net/)
+- [[02-selfhosting/security/linux-server-hardening-checklist|Linux Server Hardening Checklist]]
diff --git a/SUMMARY.md b/SUMMARY.md
index 5ccb815..2074268 100644
--- a/SUMMARY.md
+++ b/SUMMARY.md
@@ -53,3 +53,4 @@
     * [mdadm RAID Recovery After USB Hub Disconnect](05-troubleshooting/storage/mdadm-usb-hub-disconnect-recovery.md)
     * [Windows OpenSSH Server (sshd) Stops After Reboot](05-troubleshooting/networking/windows-sshd-stops-after-reboot.md)
     * [Ollama Drops Off Tailscale When Mac Sleeps](05-troubleshooting/ollama-macos-sleep-tailscale-disconnect.md)
+    * [ClamAV CPU Spike: Safe Scheduling with nice/ionice](05-troubleshooting/security/clamscan-cpu-spike-nice-ionice.md)