Newer
Older
waccess / waccess
#!/usr/bin/env python
# waccess - Copyright (c) 2001,2002, TundraWare Inc., All Rights Reserved
# $Id: waccess,v 1.63 2002/09/02 18:33:28 tundra Exp $

#
# Look for selected strings passed on the command line in the http access log.
# If found, dump the address, name, item retrieved, and access date for the
# matching record.


import commands
import getopt
import socket
import sys

##########
# Booleans
##########

FALSE = 0==1
TRUE = not FALSE

##########
# Constants & Tables
##########

# List of IP addesses to ignore.  Records with IP addresses found
# in this list will be ignored entirely.  The addresses here may
# be partial IP quads.

IGNORED = ["127.0", "192.168.0."]

# This table is built dynamically at run time to keep track of
# all DNS reverse lookups.  Index into the table by IP address.


REVERSE_CACHE = {}


##########
# Function Defintions
##########

# Do a reverse lookup on an IP address, caching the results
# so that subsequent reverse lookups can use the cache instead of
# doing another lookup.

def IPReverse(ipadr):
    if REVERSE_CACHE.has_key(ipadr):
        revname = REVERSE_CACHE[ipadr]
    else:
        try:
            revname = socket.gethostbyaddr(ipadr)[0]            
        except:
            revname = "NO REVERSE RESOLUTION"

        REVERSE_CACHE[ipadr] = revname

    return revname



# Print program usage information and error exit.

def usage():
    print "usage: waccess [-irs -f logfile]"
    sys.exit(2)
    


##########
# Command Line Processing
##########

LOG = "/var/log/httpd-access.log"
NOIGNORE = FALSE
REVERSE  = FALSE
SHOW     = TRUE

try:
    opts, args = getopt.getopt(sys.argv[1:], '-f:irs')
except getopt.GetoptError:
    usage()
    
for opt, val in opts:
    if opt == "-f":
        LOG = val
    if opt == "-i":
        NOIGNORE = TRUE
    if opt == "-r":
        REVERSE = TRUE
        SHOW = TRUE
    if opt == "-s":
        SHOW = FALSE
        REVERSE = FALSE

##########
# Process the log
##########

f = open(LOG)

matched = {}
for a in args:
    matched[a] = 0

total = 0

# Read in the whole log file
for record in f.read().splitlines():

    total += 1
    fields = record.split()

    # These field definitions are appropriate for Apache access logs.
    # They may need to be changed for other log layouts.
    
    DATESTAMP= fields[3][1:]
    IPADR = fields[0]
    CMD = fields[5]
    FILE = fields[6]

    # See if this is an IP address to ignore unless user suppresses feature

    PROCESS = TRUE
    if not NOIGNORE:
        for ignoreIP in IGNORED:
            if IPADR.startswith(ignoreIP):
                PROCESS = FALSE
       
    if PROCESS:

        # Check each log record for a match with any command line argument

        MATCHED = FALSE
        for a in args:
            if record.count(a):
                i = 0
                revname = ""
                matched[a] += 1
                MATCHED = TRUE


        # But only display the matching record once, regardless of how many
        # matching substrings are found.

        if MATCHED:
            if REVERSE:
                revname = IPReverse(IPADR)

            else:
                # Even if we're not doing reverse lookups, use the cache data
                # structure to keep track of how many unique IPs we encounter
                REVERSE_CACHE[IPADR] = IPADR
            

            if SHOW:
                print DATESTAMP, " " * (19 - len(DATESTAMP)), \
                      IPADR, " " * (15 - len(IPADR)), \
                      revname[-(35+1):], " " * (35 - len(revname)), \
                      CMD[1:], " " * (8 - len(CMD)), FILE

        

f.close()
print "\nProcessed %d Total Records.\n" % (total,)
for a in args:
    print "%s%s=> %d Accesses from %s Unique IPs" % (a, (15-len(a))*" ", matched[a], len(REVERSE_CACHE))