#!/usr/bin/env python
# waccess - Copyright (c) 2001,2002, TundraWare Inc., All Rights Reserved
VERSION = "$Id: waccess,v 2.0 2002/09/03 21:00:00 tundra Exp $"
#
# Look for selected strings passed on the command line in the http access log.
# If found, dump the address, name, item retrieved, and access date for the
# matching record.
import commands
import getopt
import os
import socket
import sys
##########
# Booleans
##########
FALSE = 0==1
TRUE = not FALSE
##########
# Constants & Tables
##########
# List of IP addesses to ignore. Records with IP addresses found
# in this list will be ignored entirely. The addresses here may
# be partial IP quads. If IGNOREDFILE exists, its contents will
# be appended to the IGNORED data structure at program startup.
IGNORED = []
IGNOREDFILE = os.path.join(os.getenv("HOME"), ".waccessignored")
# This table is built dynamically at run time to keep track of
# all DNS reverse lookups. Index into the table by IP address.
REVERSE_CACHE = {}
# This table keeps track of how many unique IPs access each
# search key.
UNIQUE_IP = {}
##########
# Function Defintions
##########
##########
# Do a reverse lookup on an IP address, caching the results
# so that subsequent reverse lookups can use the cache instead of
# doing another lookup.
##########
def IPReverse(ipadr):
if REVERSE_CACHE.has_key(ipadr):
revname = REVERSE_CACHE[ipadr]
else:
try:
revname = socket.gethostbyaddr(ipadr)[0]
except:
revname = "NO REVERSE RESOLUTION"
REVERSE_CACHE[ipadr] = revname
return revname
##########
# Print program usage information
##########
def usage():
UsageInfo = (
("waccess " + VERSION.split()[2] +
" - Copyright (c) 2001, 2002 TundraWare Inc., All Rights Reserved. \n", ""),
(" usage: waccess [-achilqrsv] [-f logfile] [search-key...] where,\n\n", ""),
("-a", "Sort summary output alphabetically by search key\n"),
("-c", "Ignore case when checking for key match in access record\n"),
("-f logfile", "Name of logfile to use. Default is /var/log/httpd-access.log\n"),
("-h", "Display this help information\n"),
("-i", "Do not ignore any addresses\n"),
("-l", "List ignored addresses in summary output\n"),
("-q", "Quiet mode - suppresses summary output\n"),
("-r", "Do reverse addess lookups on each matching record\n"),
("-s", "Only show summary output - do not display individual matching records\n"),
("-v", "Show detailed version information\n"),
("seach-key...", "Strings to look for in each access record\n\n")
)
for x, y in UsageInfo:
if len(x) < 20: # Only indent for the actual argument info
sys.stdout.write(10 * " ")
sys.stdout.write(x)
sys.stdout.write((20 - len(x)) * " ")
sys.stdout.write(y)
##########
# Command Line Processing
##########
IGNORECASE = FALSE
LOG = "/var/log/httpd-access.log"
SHOWIGNORED = FALSE
NOIGNORE = FALSE
SUMMARY = TRUE
REVERSE = FALSE
SHOW = TRUE
SORTED = FALSE
try:
opts, args = getopt.getopt(sys.argv[1:], '-acf:ilqrsv')
except getopt.GetoptError:
usage()
sys.exit(2)
for opt, val in opts:
if opt == "-a":
SORTED = TRUE
if opt == "-c":
IGNORECASE = TRUE
if opt == "-f":
LOG = val
if opt == "-h":
usage()
sys.exit(0)
if opt == "-i":
NOIGNORE = TRUE
if opt == "-l":
SHOWIGNORED = TRUE
if opt == "-q":
SUMMARY = FALSE
if opt == "-r":
REVERSE = TRUE
SHOW = TRUE
if opt == "-s":
SHOW = FALSE
REVERSE = FALSE
if opt == "-v":
print VERSION
sys.exit(0)
##########
# Process the ignored rc file, if any
##########
if os.path.exists(IGNOREDFILE):
i = open(IGNOREDFILE)
for ip in i.read().splitlines():
IGNORED.append(ip)
i.close()
##########
# Process the log
##########
f = open(LOG)
matched = {}
for a in args:
matched[a] = 0
UNIQUE_IP[a] = []
total = 0
# Read in the whole log file
for record in f.read().splitlines():
total += 1
fields = record.split()
# These field definitions are appropriate for Apache access logs.
# They may need to be changed for other log layouts.
DATESTAMP= fields[3][1:]
IPADR = fields[0]
CMD = fields[5]
FILE = fields[6]
# See if this is an IP address to ignore unless user suppresses feature
PROCESS = TRUE
if not NOIGNORE:
for ignoreIP in IGNORED:
if IPADR.startswith(ignoreIP):
PROCESS = FALSE
if PROCESS:
# Check each log record for a match with any command line argument
MATCHED = FALSE
for a in args:
if (not IGNORECASE and record.count(a)) or (IGNORECASE and record.lower().count(a.lower())):
matched[a] += 1
MATCHED = TRUE
# Save if new IP address encountered
if not UNIQUE_IP[a].count(IPADR):
UNIQUE_IP[a].append(IPADR)
# only display the matching record once, regardless of how many
# matching substrings are found.
if MATCHED:
if REVERSE:
revname = IPReverse(IPADR)
else:
revname = ""
if SHOW:
print DATESTAMP, " " * (19 - len(DATESTAMP)), \
IPADR, " " * (15 - len(IPADR)), \
revname[-(35+1):], " " * (35 - len(revname)), \
CMD[1:], " " * (8 - len(CMD)), FILE
f.close()
##########
# Output Summary Of Results Unless Told Not To
##########
if SUMMARY:
# Show total number of records processed.
print "\nProcessed %d Total Records.\n" % (total,)
# Show contents of final ignore table if user asked for it.
# This is done only if the ignore feature is enabled.
if SHOWIGNORED and not NOIGNORE:
print "The Following Addresses Were Ignored:\n"
for a in IGNORED:
print a
print "\n\n"
# Sort output if user requested it
if SORTED:
args.sort()
# Summarize number of hits and unique IPs by seach key
for a in args:
num_matched = str(matched[a])
num_unique = str(len(UNIQUE_IP[a]))
print "%s : %s Accesses From %s Unique IPs" % (\
a + (20 - len(a)) * " ", \
(" " * (8 - len(num_matched))) + num_matched, \
(" " * (8 - len(num_unique))) + num_unique)