#!/usr/bin/env python # waccess - Copyright (c) 2001,2002, TundraWare Inc., All Rights Reserved VERSION = "$Id: waccess,v 2.0 2002/09/03 21:00:00 tundra Exp $" # # Look for selected strings passed on the command line in the http access log. # If found, dump the address, name, item retrieved, and access date for the # matching record. import commands import getopt import os import socket import sys ########## # Booleans ########## FALSE = 0==1 TRUE = not FALSE ########## # Constants & Tables ########## # List of IP addesses to ignore. Records with IP addresses found # in this list will be ignored entirely. The addresses here may # be partial IP quads. If IGNOREDFILE exists, its contents will # be appended to the IGNORED data structure at program startup. IGNORED = [] IGNOREDFILE = os.path.join(os.getenv("HOME"), ".waccessignored") # This table is built dynamically at run time to keep track of # all DNS reverse lookups. Index into the table by IP address. REVERSE_CACHE = {} # This table keeps track of how many unique IPs access each # search key. UNIQUE_IP = {} ########## # Function Defintions ########## ########## # Do a reverse lookup on an IP address, caching the results # so that subsequent reverse lookups can use the cache instead of # doing another lookup. ########## def IPReverse(ipadr): if REVERSE_CACHE.has_key(ipadr): revname = REVERSE_CACHE[ipadr] else: try: revname = socket.gethostbyaddr(ipadr)[0] except: revname = "NO REVERSE RESOLUTION" REVERSE_CACHE[ipadr] = revname return revname ########## # Print program usage information ########## def usage(): UsageInfo = ( ("waccess " + VERSION.split()[2] + " - Copyright (c) 2001, 2002 TundraWare Inc., All Rights Reserved. \n", ""), (" usage: waccess [-achilqrsv] [-f logfile] [search-key...] where,\n\n", ""), ("-a", "Sort summary output alphabetically by search key\n"), ("-c", "Ignore case when checking for key match in access record\n"), ("-f logfile", "Name of logfile to use. Default is /var/log/httpd-access.log\n"), ("-h", "Display this help information\n"), ("-i", "Do not ignore any addresses\n"), ("-l", "List ignored addresses in summary output\n"), ("-q", "Quiet mode - suppresses summary output\n"), ("-r", "Do reverse addess lookups on each matching record\n"), ("-s", "Only show summary output - do not display individual matching records\n"), ("-v", "Show detailed version information\n"), ("seach-key...", "Strings to look for in each access record\n\n") ) for x, y in UsageInfo: if len(x) < 20: # Only indent for the actual argument info sys.stdout.write(10 * " ") sys.stdout.write(x) sys.stdout.write((20 - len(x)) * " ") sys.stdout.write(y) ########## # Command Line Processing ########## IGNORECASE = FALSE LOG = "/var/log/httpd-access.log" SHOWIGNORED = FALSE NOIGNORE = FALSE SUMMARY = TRUE REVERSE = FALSE SHOW = TRUE SORTED = FALSE try: opts, args = getopt.getopt(sys.argv[1:], '-acf:ilqrsv') except getopt.GetoptError: usage() sys.exit(2) for opt, val in opts: if opt == "-a": SORTED = TRUE if opt == "-c": IGNORECASE = TRUE if opt == "-f": LOG = val if opt == "-h": usage() sys.exit(0) if opt == "-i": NOIGNORE = TRUE if opt == "-l": SHOWIGNORED = TRUE if opt == "-q": SUMMARY = FALSE if opt == "-r": REVERSE = TRUE SHOW = TRUE if opt == "-s": SHOW = FALSE REVERSE = FALSE if opt == "-v": print VERSION sys.exit(0) ########## # Process the ignored rc file, if any ########## if os.path.exists(IGNOREDFILE): i = open(IGNOREDFILE) for ip in i.read().splitlines(): IGNORED.append(ip) i.close() ########## # Process the log ########## f = open(LOG) matched = {} for a in args: matched[a] = 0 UNIQUE_IP[a] = [] total = 0 # Read in the whole log file for record in f.read().splitlines(): total += 1 fields = record.split() # These field definitions are appropriate for Apache access logs. # They may need to be changed for other log layouts. DATESTAMP= fields[3][1:] IPADR = fields[0] CMD = fields[5] FILE = fields[6] # See if this is an IP address to ignore unless user suppresses feature PROCESS = TRUE if not NOIGNORE: for ignoreIP in IGNORED: if IPADR.startswith(ignoreIP): PROCESS = FALSE if PROCESS: # Check each log record for a match with any command line argument MATCHED = FALSE for a in args: if (not IGNORECASE and record.count(a)) or (IGNORECASE and record.lower().count(a.lower())): matched[a] += 1 MATCHED = TRUE # Save if new IP address encountered if not UNIQUE_IP[a].count(IPADR): UNIQUE_IP[a].append(IPADR) # only display the matching record once, regardless of how many # matching substrings are found. if MATCHED: if REVERSE: revname = IPReverse(IPADR) else: revname = "" if SHOW: print DATESTAMP, " " * (19 - len(DATESTAMP)), \ IPADR, " " * (15 - len(IPADR)), \ revname[-(35+1):], " " * (35 - len(revname)), \ CMD[1:], " " * (8 - len(CMD)), FILE f.close() ########## # Output Summary Of Results Unless Told Not To ########## if SUMMARY: # Show total number of records processed. print "\nProcessed %d Total Records.\n" % (total,) # Show contents of final ignore table if user asked for it. # This is done only if the ignore feature is enabled. if SHOWIGNORED and not NOIGNORE: print "The Following Addresses Were Ignored:\n" for a in IGNORED: print a print "\n\n" # Sort output if user requested it if SORTED: args.sort() # Summarize number of hits and unique IPs by seach key for a in args: num_matched = str(matched[a]) num_unique = str(len(UNIQUE_IP[a])) print "%s : %s Accesses From %s Unique IPs" % (\ a + (20 - len(a)) * " ", \ (" " * (8 - len(num_matched))) + num_matched, \ (" " * (8 - len(num_unique))) + num_unique)