#!/usr/bin/env python # waccess - Copyright (c) 2001,2002, TundraWare Inc., All Rights Reserved # $Id: waccess,v 1.63 2002/09/02 18:33:28 tundra Exp $ # # Look for selected strings passed on the command line in the http access log. # If found, dump the address, name, item retrieved, and access date for the # matching record. import commands import getopt import socket import sys ########## # Booleans ########## FALSE = 0==1 TRUE = not FALSE ########## # Constants & Tables ########## # List of IP addesses to ignore. Records with IP addresses found # in this list will be ignored entirely. The addresses here may # be partial IP quads. IGNORED = ["127.0", "192.168.0."] # This table is built dynamically at run time to keep track of # all DNS reverse lookups. Index into the table by IP address. REVERSE_CACHE = {} ########## # Function Defintions ########## # Do a reverse lookup on an IP address, caching the results # so that subsequent reverse lookups can use the cache instead of # doing another lookup. def IPReverse(ipadr): if REVERSE_CACHE.has_key(ipadr): revname = REVERSE_CACHE[ipadr] else: try: revname = socket.gethostbyaddr(ipadr)[0] except: revname = "NO REVERSE RESOLUTION" REVERSE_CACHE[ipadr] = revname return revname # Print program usage information and error exit. def usage(): print "usage: waccess [-irs -f logfile]" sys.exit(2) ########## # Command Line Processing ########## LOG = "/var/log/httpd-access.log" NOIGNORE = FALSE REVERSE = FALSE SHOW = TRUE try: opts, args = getopt.getopt(sys.argv[1:], '-f:irs') except getopt.GetoptError: usage() for opt, val in opts: if opt == "-f": LOG = val if opt == "-i": NOIGNORE = TRUE if opt == "-r": REVERSE = TRUE SHOW = TRUE if opt == "-s": SHOW = FALSE REVERSE = FALSE ########## # Process the log ########## f = open(LOG) matched = {} for a in args: matched[a] = 0 total = 0 # Read in the whole log file for record in f.read().splitlines(): total += 1 fields = record.split() # These field definitions are appropriate for Apache access logs. # They may need to be changed for other log layouts. DATESTAMP= fields[3][1:] IPADR = fields[0] CMD = fields[5] FILE = fields[6] # See if this is an IP address to ignore unless user suppresses feature PROCESS = TRUE if not NOIGNORE: for ignoreIP in IGNORED: if IPADR.startswith(ignoreIP): PROCESS = FALSE if PROCESS: # Check each log record for a match with any command line argument MATCHED = FALSE for a in args: if record.count(a): i = 0 revname = "" matched[a] += 1 MATCHED = TRUE # But only display the matching record once, regardless of how many # matching substrings are found. if MATCHED: if REVERSE: revname = IPReverse(IPADR) else: # Even if we're not doing reverse lookups, use the cache data # structure to keep track of how many unique IPs we encounter REVERSE_CACHE[IPADR] = IPADR if SHOW: print DATESTAMP, " " * (19 - len(DATESTAMP)), \ IPADR, " " * (15 - len(IPADR)), \ revname[-(35+1):], " " * (35 - len(revname)), \ CMD[1:], " " * (8 - len(CMD)), FILE f.close() print "\nProcessed %d Total Records.\n" % (total,) for a in args: print "%s%s=> %d Accesses from %s Unique IPs" % (a, (15-len(a))*" ", matched[a], len(REVERSE_CACHE))