Newer
Older
waccess / waccess
#!/usr/bin/env python
# waccess - Copyright (c) 2001,2002, TundraWare Inc., All Rights Reserved


VERSION = "$Id: waccess,v 1.67 2002/09/03 17:50:42 tundra Exp $"

#
# Look for selected strings passed on the command line in the http access log.
# If found, dump the address, name, item retrieved, and access date for the
# matching record.


import commands
import getopt
import os
import socket
import sys

##########
# Booleans
##########

FALSE = 0==1
TRUE = not FALSE

##########
# Constants & Tables
##########

# List of IP addesses to ignore.  Records with IP addresses found
# in this list will be ignored entirely.  The addresses here may
# be partial IP quads.  If IGNOREDFILE exists, its contents will
# be appended to the IGNORED data structure at program startup.

IGNORED = ["127.0", ]

IGNOREDFILE = os.path.join(os.getenv("HOME"), ".waccessignored")


# This table is built dynamically at run time to keep track of
# all DNS reverse lookups.  Index into the table by IP address.


REVERSE_CACHE = {}

# This table keeps track of how many unique IPs access each
# search key.

UNIQUE_IP = {}


##########
# Function Defintions
##########

##########
# Do a reverse lookup on an IP address, caching the results
# so that subsequent reverse lookups can use the cache instead of
# doing another lookup.
##########

def IPReverse(ipadr):
    if REVERSE_CACHE.has_key(ipadr):
        revname = REVERSE_CACHE[ipadr]
    else:
        try:
            revname = socket.gethostbyaddr(ipadr)[0]            
        except:
            revname = "NO REVERSE RESOLUTION"

        REVERSE_CACHE[ipadr] = revname

    return revname


##########
# Print program version number and exit normally
##########

def version():
    print VERSION
    sys.exit(0)


##########
# Print program usage information and error exit.
##########

def usage():
    print "usage: waccess [-ailqrsv -f logfile]"
    sys.exit(2)
    


##########
# Command Line Processing
##########

LOG        = "/var/log/httpd-access.log"
LISTIGNORE = FALSE
NOIGNORE   = FALSE
SUMMARY    = TRUE
REVERSE    = FALSE
SHOW       = TRUE
SORTED     = FALSE

try:
    opts, args = getopt.getopt(sys.argv[1:], '-af:ilqrsv')
except getopt.GetoptError:
    usage()
    
for opt, val in opts:
    if opt == "-a":
        SORTED = TRUE
    if opt == "-f":
        LOG = val
    if opt == "-i":
        NOIGNORE = TRUE
    if opt == "-l":
        LISTIGNORE = TRUE
    if opt == "-q":
        SUMMARY = FALSE
    if opt == "-r":
        REVERSE = TRUE
        SHOW = TRUE
    if opt == "-s":
        SHOW = FALSE
        REVERSE = FALSE
    if opt == "-v":
        version()
        

##########
# Process the ignored rc file, if any
##########

if os.path.exists(IGNOREDFILE):
    i = open(IGNOREDFILE)
    for ip in i.read().splitlines():
        IGNORED.append(ip)
    i.close()

# Show contents of final ignore table if user asked for it.
# This is done only if the ignore feature is enabled.

if LISTIGNORE and not NOIGNORE:
    print "Ignoring Addresses:"
    for a in IGNORED:
        print 20*" " + a
    print "\n\n"

##########
# Process the log
##########

f = open(LOG)

matched = {}
for a in args:
    matched[a] = 0
    UNIQUE_IP[a] = []

total = 0

# Read in the whole log file
for record in f.read().splitlines():

    total += 1
    fields = record.split()

    # These field definitions are appropriate for Apache access logs.
    # They may need to be changed for other log layouts.
    
    DATESTAMP= fields[3][1:]
    IPADR = fields[0]
    CMD = fields[5]
    FILE = fields[6]

    # See if this is an IP address to ignore unless user suppresses feature

    PROCESS = TRUE
    if not NOIGNORE:
        for ignoreIP in IGNORED:
            if IPADR.startswith(ignoreIP):
                PROCESS = FALSE
       
    if PROCESS:

        # Check each log record for a match with any command line argument

        MATCHED = FALSE
        for a in args:
            if record.count(a):
                matched[a] += 1
                MATCHED = TRUE
                # Save if new IP address encountered
                if not UNIQUE_IP[a].count(IPADR):
                    UNIQUE_IP[a].append(IPADR)

        # only display the matching record once, regardless of how many
        # matching substrings are found.

        if MATCHED:
            if REVERSE:
                revname = IPReverse(IPADR)

            if SHOW:
                print DATESTAMP, " " * (19 - len(DATESTAMP)), \
                      IPADR, " " * (15 - len(IPADR)), \
                      revname[-(35+1):], " " * (35 - len(revname)), \
                      CMD[1:], " " * (8 - len(CMD)), FILE

        

f.close()


##########
# Output Summary Of Results Unless Told Not To
##########


if SUMMARY:

    print "\nProcessed %d Total Records.\n" % (total,)

    if SORTED:
        args.sort()

    for a in args:
        num_matched = str(matched[a])
        num_unique = str(len(UNIQUE_IP[a]))

        print "%s : %s Accesses From %s Unique IPs" % (\
                              a + (20 - len(a)) * " ", \
                              (" " * (8 - len(num_matched))) + num_matched, \
                              (" " * (8 - len(num_unique))) +  num_unique)