Newer
Older
nohtml / nohtml.py
#!/usr/bin/env python
# nohtml.py - Filter To Remove HTML Attachments From Email
# Copyright (c) 2003 TundraWare Inc.  All Rights Reserved.
# For Updates See:  http://www.tundraware.com/Software/nohtml


#------------------- Nothing Below Here Should Need Changing ------------------#

# Program Information

PROGNAME = "nohtml"
RCSID = "$Id: nohtml.py,v 1.11 2003/05/06 22:59:02 tundra Exp $"
VERSION = RCSID.split()[2]

# Copyright Information

CPRT         = chr(169)
DATE         = 2003
OWNER        = "TundraWare Inc."
RIGHTS       = "All Rights Reserved"
COPYRIGHT    = "Copyright %s %s %s  %s. " % (CPRT, DATE, OWNER, RIGHTS)


#----------------------------------------------------------#
#                       Imports                            #
#----------------------------------------------------------#

import getopt
import os
import sys


#----------------------------------------------------------#
#                Constants & Literals                      #
#----------------------------------------------------------#


#####
# Literals
#####

BOUNDARY     = "BOUNDARY=".lower()
STARTHTML0   = "<HTML>".lower()
STARTHTML1   = "Content-Type:".lower()
STARTHTML2   = "text/html".lower()


#----------------------------------------------------------#
#              Prompts, & Application Strings              #
#----------------------------------------------------------#


#####
# Usage Prompts
#####

uTable = [PROGNAME + " " + VERSION + " - %s\n" % COPYRIGHT,
          "usage:  " + PROGNAME + " [-hv] where,\n",
          "          -h       print this help information",
          "          -v       print detailed version information",
          ]


#--------------------------- Code Begins Here ---------------------------------#


    

#----------------------------------------------------------#
#             Supporting Function Definitions              #
#----------------------------------------------------------#


#####
# Print Usage Information
#####

def Usage():
    for line in uTable:
        print line
        

#----------------------------------------------------------#
#                    Program Entry Point                   #
#----------------------------------------------------------#

# Command line processing - Process any options set in the
# environment first, and then those given on the command line

OPTIONS = sys.argv[1:]
envopt = os.getenv(PROGNAME.upper())
if envopt:
    OPTIONS = envopt.split() + OPTIONS

try:
    opts, args = getopt.getopt(OPTIONS, '-hv')
except getopt.GetoptError:
    Usage()
    sys.exit(1)

for opt, val in opts:
    if opt == "-h":
        Usage()
        sys.exit(0)
    if opt == "-v":
        print RCSID
        sys.exit(0)

lines = sys.stdin.readlines()
numlin = len(lines)

MIMESEP  = ""
ATTACH   = [[0, False]]

# Determine MIME boundary, if any, and find all attachments.
# Along the way, mark any HTML attachments so we can ignore later.

for x in range(numlin):

    line  = lines[x]
    lline = line.lower()

    # Keep track of current MIME separator string

    if lline.count(BOUNDARY):

        # Get just the separator string
        MIMESEP = line.split(BOUNDARY)[1].strip()

        # Delete leading quote
        if MIMESEP[0] == '"':
            MIMESEP = MIMESEP[1:]

        # Delete trailing quote 
        if MIMESEP[-1] == '"':
            MIMESEP = MIMESEP[:-1]


    # Note existence of next attachment
    if MIMESEP and line.count(MIMESEP):
        # End of last attachment
        ATTACH[-1].append(x)

        # Beginning of next attachment
        ATTACH.append([x, False])

    # If any of the trigger words indicating HTML are found in the
    # current attachment, note that fact by setting the second entry of
    # the associated list to True.

    if lline.count(STARTHTML0) or (lline.count(STARTHTML1) and lline.count(STARTHTML2)):
        ATTACH[-1][1] = True

# Make sure last partition list is filled properly
ATTACH[-1].append(numlin)

# Now output everything which is not HTML

for part in ATTACH:
    if not part[1]:
        for x in range(part[0], part[2]):
            sys.stdout.write(lines[x])