#!/bin/sh # Look for the most heavily-advertised spam domain in this message tr -d '\r' | tr '\\' '/' | awk ' NF == 0, 1 == 0 { while ($0 ~ /=$/) { sub("=$", "") current = $0 rv = getline if (rv < 1) break $0 = current $0 } gsub("=2[eE]", ".") print }' | sed -e 's/[<]![A-Z][A-Z][A-Z][>]//g' \ -e 's/[hH][tT][tT][pP]:/http:/g' \ -e 's#^www\.#http://www.#' \ -e 's# www\.#http://www.#' \ -e 's#\([a-z][a-z]*\):\\\\#\1://#g' \ -e 's#http:/\([^/]\)#http://\1#g' | egrep 'http://' | sed -e 's#http://rd\.yahoo\.com/[^<>"]*http://#http://#g' \ -e 's#http://srd\.yahoo\.com/[^<>"]*http://#http://#g' \ -e 's#http://drs\.yahoo\.com/[^<>"]*http://#http://#g' \ -e 's#http://rds\.yahoo\.com/[^<>"]*http://#http://#g' \ -e 's#http://click\.shopping\.yahoo\.com/[^<>"]*http://#http://#g' \ -e 's#http://[^/]*@[^:/]*@#http://#g' \ -e 's#http://[^@:/]*:[^@/]*@#http://#g' \ -e 's#\(http://[^/]*/\)[^ ">]*#\1deleted-path#g' \ -e 's#[.][.]*#.#g' \ -e 's#\(http://[^/"'"'"' :?<>)]*\)#\ \1\ #g' | awk ' /^http:\/\/[^/"'"'"' :?<>)]+$/ { sub("^http://[^@]*@", "http://") DOMAIN=substr($0, 8) if (DOMAIN !~ /^(www.?w3.?org|internet\.e-mail|(.*\.)?(hotmail|msn)\.com)$/) { count[DOMAIN]++ } } END { for (domain in count) { printf "%s\t%s\n", count[domain], domain } }' | sort -rn | head -n 1 | cut -f2