diff --git a/waccess b/waccess index 66f4f09..78236a6 100755 --- a/waccess +++ b/waccess @@ -1,6 +1,6 @@ #!/usr/bin/env python # waccess - Copyright (c) 2001,2002, TundraWare Inc., All Rights Reserved -# $Id: waccess,v 1.62 2002/09/02 06:24:08 tundra Exp $ +# $Id: waccess,v 1.63 2002/09/02 18:33:28 tundra Exp $ # # Look for selected strings passed on the command line in the http access log. @@ -31,17 +31,37 @@ IGNORED = ["127.0", "192.168.0."] # This table is built dynamically at run time to keep track of -# any DNS that cannot be reverse. That way we only have to -# waste lookup time on such an address the first time it is -# encountered. +# all DNS reverse lookups. Index into the table by IP address. -BADREVERSE = [] + +REVERSE_CACHE = {} ########## # Function Defintions ########## +# Do a reverse lookup on an IP address, caching the results +# so that subsequent reverse lookups can use the cache instead of +# doing another lookup. + +def IPReverse(ipadr): + if REVERSE_CACHE.has_key(ipadr): + revname = REVERSE_CACHE[ipadr] + else: + try: + revname = socket.gethostbyaddr(ipadr)[0] + except: + revname = "NO REVERSE RESOLUTION" + + REVERSE_CACHE[ipadr] = revname + + return revname + + + +# Print program usage information and error exit. + def usage(): print "usage: waccess [-irs -f logfile]" sys.exit(2) @@ -92,12 +112,20 @@ total += 1 fields = record.split() + # These field definitions are appropriate for Apache access logs. + # They may need to be changed for other log layouts. + + DATESTAMP= fields[3][1:] + IPADR = fields[0] + CMD = fields[5] + FILE = fields[6] + # See if this is an IP address to ignore unless user suppresses feature PROCESS = TRUE if not NOIGNORE: for ignoreIP in IGNORED: - if fields[0].startswith(ignoreIP): + if IPADR.startswith(ignoreIP): PROCESS = FALSE if PROCESS: @@ -118,26 +146,23 @@ if MATCHED: if REVERSE: - try: - if not BADREVERSE.count(fields[0]): - revname = socket.gethostbyaddr(fields[0])[0] - else: - revname = "NO REVERSE RESOLUTION" - except: - BADREVERSE.append(fields[0]) - revname = "NO REVERSE RESOLUTION" + revname = IPReverse(IPADR) + + else: + # Even if we're not doing reverse lookups, use the cache data + # structure to keep track of how many unique IPs we encounter + REVERSE_CACHE[IPADR] = IPADR + if SHOW: - print fields[3][1:], " " * (19 - len(fields[3][1:])), \ - fields[0], " " * (15 - len(fields[0])), \ + print DATESTAMP, " " * (19 - len(DATESTAMP)), \ + IPADR, " " * (15 - len(IPADR)), \ revname[-(35+1):], " " * (35 - len(revname)), \ - fields[5], " " * (8 - len(fields[5])), fields[6] + CMD[1:], " " * (8 - len(CMD)), FILE f.close() print "\nProcessed %d Total Records.\n" % (total,) for a in args: - print "Found %d Matching Records Containing: %s" % (matched[a], a) - - + print "%s%s=> %d Accesses from %s Unique IPs" % (a, (15-len(a))*" ", matched[a], len(REVERSE_CACHE))