#!/usr/bin/env python # waccess - Copyright (c) 2001,2002, TundraWare Inc., All Rights Reserved VERSION = "$Id: waccess,v 1.67 2002/09/03 17:50:42 tundra Exp $" # # Look for selected strings passed on the command line in the http access log. # If found, dump the address, name, item retrieved, and access date for the # matching record. import commands import getopt import os import socket import sys ########## # Booleans ########## FALSE = 0==1 TRUE = not FALSE ########## # Constants & Tables ########## # List of IP addesses to ignore. Records with IP addresses found # in this list will be ignored entirely. The addresses here may # be partial IP quads. If IGNOREDFILE exists, its contents will # be appended to the IGNORED data structure at program startup. IGNORED = ["127.0", ] IGNOREDFILE = os.path.join(os.getenv("HOME"), ".waccessignored") # This table is built dynamically at run time to keep track of # all DNS reverse lookups. Index into the table by IP address. REVERSE_CACHE = {} # This table keeps track of how many unique IPs access each # search key. UNIQUE_IP = {} ########## # Function Defintions ########## ########## # Do a reverse lookup on an IP address, caching the results # so that subsequent reverse lookups can use the cache instead of # doing another lookup. ########## def IPReverse(ipadr): if REVERSE_CACHE.has_key(ipadr): revname = REVERSE_CACHE[ipadr] else: try: revname = socket.gethostbyaddr(ipadr)[0] except: revname = "NO REVERSE RESOLUTION" REVERSE_CACHE[ipadr] = revname return revname ########## # Print program version number and exit normally ########## def version(): print VERSION sys.exit(0) ########## # Print program usage information and error exit. ########## def usage(): print "usage: waccess [-ailqrsv -f logfile]" sys.exit(2) ########## # Command Line Processing ########## LOG = "/var/log/httpd-access.log" LISTIGNORE = FALSE NOIGNORE = FALSE SUMMARY = TRUE REVERSE = FALSE SHOW = TRUE SORTED = FALSE try: opts, args = getopt.getopt(sys.argv[1:], '-af:ilqrsv') except getopt.GetoptError: usage() for opt, val in opts: if opt == "-a": SORTED = TRUE if opt == "-f": LOG = val if opt == "-i": NOIGNORE = TRUE if opt == "-l": LISTIGNORE = TRUE if opt == "-q": SUMMARY = FALSE if opt == "-r": REVERSE = TRUE SHOW = TRUE if opt == "-s": SHOW = FALSE REVERSE = FALSE if opt == "-v": version() ########## # Process the ignored rc file, if any ########## if os.path.exists(IGNOREDFILE): i = open(IGNOREDFILE) for ip in i.read().splitlines(): IGNORED.append(ip) i.close() # Show contents of final ignore table if user asked for it. # This is done only if the ignore feature is enabled. if LISTIGNORE and not NOIGNORE: print "Ignoring Addresses:" for a in IGNORED: print 20*" " + a print "\n\n" ########## # Process the log ########## f = open(LOG) matched = {} for a in args: matched[a] = 0 UNIQUE_IP[a] = [] total = 0 # Read in the whole log file for record in f.read().splitlines(): total += 1 fields = record.split() # These field definitions are appropriate for Apache access logs. # They may need to be changed for other log layouts. DATESTAMP= fields[3][1:] IPADR = fields[0] CMD = fields[5] FILE = fields[6] # See if this is an IP address to ignore unless user suppresses feature PROCESS = TRUE if not NOIGNORE: for ignoreIP in IGNORED: if IPADR.startswith(ignoreIP): PROCESS = FALSE if PROCESS: # Check each log record for a match with any command line argument MATCHED = FALSE for a in args: if record.count(a): matched[a] += 1 MATCHED = TRUE # Save if new IP address encountered if not UNIQUE_IP[a].count(IPADR): UNIQUE_IP[a].append(IPADR) # only display the matching record once, regardless of how many # matching substrings are found. if MATCHED: if REVERSE: revname = IPReverse(IPADR) if SHOW: print DATESTAMP, " " * (19 - len(DATESTAMP)), \ IPADR, " " * (15 - len(IPADR)), \ revname[-(35+1):], " " * (35 - len(revname)), \ CMD[1:], " " * (8 - len(CMD)), FILE f.close() ########## # Output Summary Of Results Unless Told Not To ########## if SUMMARY: print "\nProcessed %d Total Records.\n" % (total,) if SORTED: args.sort() for a in args: num_matched = str(matched[a]) num_unique = str(len(UNIQUE_IP[a])) print "%s : %s Accesses From %s Unique IPs" % (\ a + (20 - len(a)) * " ", \ (" " * (8 - len(num_matched))) + num_matched, \ (" " * (8 - len(num_unique))) + num_unique)