diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6cac9e5 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +exclude_ips.txt diff --git a/analyze.sh b/analyze.sh index e41a003..c156972 100755 --- a/analyze.sh +++ b/analyze.sh @@ -1,6 +1,7 @@ #!/bin/sh -e # Simple package download stats script TEMP="/tmp/access_log_temp" +DIR="$PWD" ACCESS_LOG_DIR="/var/log/lighttpd" cd "$ACCESS_LOG_DIR" @@ -15,6 +16,13 @@ cat_log() { esac } +if [ "$1" = "-h" ]; then + echo "usage: analyze.sh [-e] [-h]" + echo "options::" + echo " -e use exclude_ips.txt" + echo " -h show this help" + exit 1 +fi echo "---" echo "Count of deb and rpm files accessed in /packages/osmocom," @@ -49,6 +57,11 @@ for file in $FILES; do done done +if [ "$1" = "-e" ]; then + python3 "$DIR"/exclude_ips.py "$DIR/exclude_ips.txt" "$TEMP" "$TEMP.2" + mv "$TEMP.2" "$TEMP" +fi + echo echo "Distributions:" distros="$(cat "$TEMP" \ diff --git a/exclude_ips.py b/exclude_ips.py new file mode 100644 index 0000000..96e3353 --- /dev/null +++ b/exclude_ips.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +import sys +import ipaddress + +exclude_ips_txt = sys.argv[1] +temp_in = sys.argv[2] +temp_out = sys.argv[3] +netmasks = [] + + +def ip_is_relevant(ip): + global netmasks + + ip = ipaddress.ip_address(ip) + + for netmask in netmasks: + if ip in netmask: + return False + + return True + + +with open(exclude_ips_txt, "r") as r: + for line in r: + line = line.rstrip() + if not line or line.startswith("==") or line.startswith("#"): + continue + netmasks += [ipaddress.ip_network(line)] + +print("") +print(f"Excluding {len(netmasks)} netmasks:") + +lines_in = 0 +lines_out = 0 +with open(temp_in, "r") as r: + with open(temp_out, "w") as w: + for line in r: + ip = line.split(" ", 1)[0] + lines_in += 1 + + if ip_is_relevant(ip): + w.write(line) + lines_out += 1 + +print("%7i lines in" % lines_in) +print("%7i lines out" % lines_out)