trying a simple shell script and fixing archives
This commit is contained in:
38
shell/email_processor.awk
Executable file
38
shell/email_processor.awk
Executable file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/awk -f
|
||||
|
||||
# Primary email processor using AWK
|
||||
# Lightweight, portable text processing for cleaning up email content
|
||||
|
||||
{
|
||||
# Remove URL defense wrappers step by step
|
||||
gsub(/https:\/\/urldefense\.com\/v3\/__/, "")
|
||||
gsub(/__[^[:space:]]*/, "")
|
||||
|
||||
# Extract and shorten URLs to domains
|
||||
# This processes all URLs in the line
|
||||
while (match($0, /https?:\/\/[^\/[:space:]]+/)) {
|
||||
url = substr($0, RSTART, RLENGTH)
|
||||
# Extract domain (remove protocol)
|
||||
domain = url
|
||||
gsub(/^https?:\/\//, "", domain)
|
||||
# Replace this URL with [domain]
|
||||
sub(url, "[" domain "]", $0)
|
||||
}
|
||||
|
||||
# Remove any remaining URL paths after domain extraction
|
||||
gsub(/\][^[:space:]]*/, "]")
|
||||
|
||||
# Remove mailto links
|
||||
gsub(/mailto:[^[:space:]]*/, "")
|
||||
|
||||
# Clean up email headers - make them bold
|
||||
if (/^From:/) { gsub(/^From:[[:space:]]*/, "**From:** ") }
|
||||
if (/^To:/) { gsub(/^To:[[:space:]]*/, "**To:** ") }
|
||||
if (/^Subject:/) { gsub(/^Subject:[[:space:]]*/, "**Subject:** ") }
|
||||
if (/^Date:/) { gsub(/^Date:[[:space:]]*/, "**Date:** ") }
|
||||
|
||||
# Skip empty lines
|
||||
if (/^[[:space:]]*$/) next
|
||||
|
||||
print
|
||||
}
|
||||
Reference in New Issue
Block a user