Newer
Older
waccess / waccess
#!/usr/bin/env python
# waccess - Copyright (c) 2001,2002, TundraWare Inc., All Rights Reserved


VERSION = "$Id: waccess,v 2.0 2002/09/03 21:00:00 tundra Exp $"

#
# Look for selected strings passed on the command line in the http access log.
# If found, dump the address, name, item retrieved, and access date for the
# matching record.


import commands
import getopt
import os
import socket
import sys

##########
# Booleans
##########

FALSE = 0==1
TRUE = not FALSE

##########
# Constants & Tables
##########

# List of IP addesses to ignore.  Records with IP addresses found
# in this list will be ignored entirely.  The addresses here may
# be partial IP quads.  If IGNOREDFILE exists, its contents will
# be appended to the IGNORED data structure at program startup.

IGNORED = []
IGNOREDFILE = os.path.join(os.getenv("HOME"), ".waccessignored")


# This table is built dynamically at run time to keep track of
# all DNS reverse lookups.  Index into the table by IP address.


REVERSE_CACHE = {}

# This table keeps track of how many unique IPs access each
# search key.

UNIQUE_IP = {}


##########
# Function Defintions
##########

##########
# Do a reverse lookup on an IP address, caching the results
# so that subsequent reverse lookups can use the cache instead of
# doing another lookup.
##########

def IPReverse(ipadr):
    if REVERSE_CACHE.has_key(ipadr):
        revname = REVERSE_CACHE[ipadr]
    else:
        try:
            revname = socket.gethostbyaddr(ipadr)[0]            
        except:
            revname = "NO REVERSE RESOLUTION"

        REVERSE_CACHE[ipadr] = revname

    return revname


##########
# Print program usage information
##########

def usage():
    UsageInfo = ( 
                 ("waccess " + VERSION.split()[2] +
                  " - Copyright (c) 2001, 2002 TundraWare Inc., All Rights Reserved. \n", ""),
                 ("  usage: waccess [-achilqrsv] [-f logfile] [search-key...]  where,\n\n", ""),
                 ("-a",         "Sort summary output alphabetically by search key\n"),
                 ("-c",         "Ignore case when checking for key match in access record\n"),
                 ("-f logfile", "Name of logfile to use.  Default is /var/log/httpd-access.log\n"),
                 ("-h",         "Display this help information\n"),
                 ("-i",         "Do not ignore any addresses\n"),
                 ("-l",         "List ignored addresses in summary output\n"),
                 ("-q",         "Quiet mode - suppresses summary output\n"), 
                 ("-r",         "Do reverse addess lookups on each matching record\n"), 
                 ("-s",         "Only show summary output - do not display individual matching records\n"), 
                 ("-v",         "Show detailed version information\n"),
                 ("seach-key...",  "Strings to look for in each access record\n\n")
                )

    for x, y in UsageInfo:
        if len(x) < 20:        # Only indent for the actual argument info
            sys.stdout.write(10 * " ")
        sys.stdout.write(x)
        sys.stdout.write((20 - len(x)) * " ")
        sys.stdout.write(y)
    


##########
# Command Line Processing
##########

IGNORECASE  = FALSE
LOG         = "/var/log/httpd-access.log"
SHOWIGNORED = FALSE
NOIGNORE    = FALSE
SUMMARY     = TRUE
REVERSE     = FALSE
SHOW        = TRUE
SORTED      = FALSE

try:
    opts, args = getopt.getopt(sys.argv[1:], '-acf:ilqrsv')
except getopt.GetoptError:
    usage()
    sys.exit(2)

    
for opt, val in opts:
    if opt == "-a":
        SORTED = TRUE
    if opt == "-c":
        IGNORECASE = TRUE
    if opt == "-f":
        LOG = val
    if opt == "-h":
        usage()
        sys.exit(0)
    if opt == "-i":
        NOIGNORE = TRUE
    if opt == "-l":
        SHOWIGNORED = TRUE
    if opt == "-q":
        SUMMARY = FALSE
    if opt == "-r":
        REVERSE = TRUE
        SHOW = TRUE
    if opt == "-s":
        SHOW = FALSE
        REVERSE = FALSE
    if opt == "-v":
        print VERSION
        sys.exit(0)

        

##########
# Process the ignored rc file, if any
##########

if os.path.exists(IGNOREDFILE):
    i = open(IGNOREDFILE)
    for ip in i.read().splitlines():
        IGNORED.append(ip)
    i.close()


##########
# Process the log
##########

f = open(LOG)

matched = {}
for a in args:
    matched[a] = 0
    UNIQUE_IP[a] = []

total = 0

# Read in the whole log file
for record in f.read().splitlines():

    total += 1

    fields = record.split()

    # These field definitions are appropriate for Apache access logs.
    # They may need to be changed for other log layouts.
    
    DATESTAMP= fields[3][1:]
    IPADR = fields[0]
    CMD = fields[5]
    FILE = fields[6]

    # See if this is an IP address to ignore unless user suppresses feature

    PROCESS = TRUE
    if not NOIGNORE:
        for ignoreIP in IGNORED:
            if IPADR.startswith(ignoreIP):
                PROCESS = FALSE
       
    if PROCESS:

        # Check each log record for a match with any command line argument

        MATCHED = FALSE
        for a in args:
            if (not IGNORECASE and record.count(a)) or (IGNORECASE and record.lower().count(a.lower())):
                matched[a] += 1
                MATCHED = TRUE
                # Save if new IP address encountered
                if not UNIQUE_IP[a].count(IPADR):
                    UNIQUE_IP[a].append(IPADR)

        # only display the matching record once, regardless of how many
        # matching substrings are found.

        if MATCHED:
            if REVERSE:
                revname = IPReverse(IPADR)
            else:
                revname = ""

            if SHOW:
                print DATESTAMP, " " * (19 - len(DATESTAMP)), \
                      IPADR, " " * (15 - len(IPADR)), \
                      revname[-(35+1):], " " * (35 - len(revname)), \
                      CMD[1:], " " * (8 - len(CMD)), FILE

        

f.close()


##########
# Output Summary Of Results Unless Told Not To
##########


if SUMMARY:

    # Show total number of records processed.

    print "\nProcessed %d Total Records.\n" % (total,)

    # Show contents of final ignore table if user asked for it.
    # This is done only if the ignore feature is enabled.

    if SHOWIGNORED and not NOIGNORE:
        print "The Following Addresses Were Ignored:\n"
        for a in IGNORED:
            print a
        print "\n\n"

    # Sort output if user requested it
    
    if SORTED:
        args.sort()

    # Summarize number of hits and unique IPs by seach key

    for a in args:
        num_matched = str(matched[a])
        num_unique = str(len(UNIQUE_IP[a]))

        print "%s : %s Accesses From %s Unique IPs" % (\
                              a + (20 - len(a)) * " ", \
                              (" " * (8 - len(num_matched))) + num_matched, \
                              (" " * (8 - len(num_unique))) +  num_unique)