diff --git a/nohtml.py b/nohtml.py index 3137bce..a63c9b1 100755 --- a/nohtml.py +++ b/nohtml.py @@ -9,7 +9,7 @@ # Program Information PROGNAME = "nohtml" -RCSID = "$Id: nohtml.py,v 1.10 2003/05/05 21:56:35 tundra Exp $" +RCSID = "$Id: nohtml.py,v 1.11 2003/05/06 22:59:02 tundra Exp $" VERSION = RCSID.split()[2] # Copyright Information @@ -39,9 +39,10 @@ # Literals ##### +BOUNDARY = "BOUNDARY=".lower() +STARTHTML0 = "".lower() STARTHTML1 = "Content-Type:".lower() STARTHTML2 = "text/html".lower() -ENDHTML = "".lower() #----------------------------------------------------------# @@ -105,23 +106,60 @@ print RCSID sys.exit(0) -PASS = True +lines = sys.stdin.readlines() +numlin = len(lines) -for line in sys.stdin.readlines(): +MIMESEP = "" +ATTACH = [[0, False]] +# Determine MIME boundary, if any, and find all attachments. +# Along the way, mark any HTML attachments so we can ignore later. + +for x in range(numlin): + + line = lines[x] lline = line.lower() - if lline.count(STARTHTML1) and lline.count(STARTHTML2): - PASS = False + # Keep track of current MIME separator string - if PASS: - sys.stdout.write(line) + if lline.count(BOUNDARY): - if lline.count(ENDHTML): - DONE = True + # Get just the separator string + MIMESEP = line.split(BOUNDARY)[1].strip() + + # Delete leading quote + if MIMESEP[0] == '"': + MIMESEP = MIMESEP[1:] + + # Delete trailing quote + if MIMESEP[-1] == '"': + MIMESEP = MIMESEP[:-1] - + # Note existence of next attachment + if MIMESEP and line.count(MIMESEP): + # End of last attachment + ATTACH[-1].append(x) - + # Beginning of next attachment + ATTACH.append([x, False]) + + # If any of the trigger words indicating HTML are found in the + # current attachment, note that fact by setting the second entry of + # the associated list to True. + + if lline.count(STARTHTML0) or (lline.count(STARTHTML1) and lline.count(STARTHTML2)): + ATTACH[-1][1] = True + +# Make sure last partition list is filled properly +ATTACH[-1].append(numlin) + +# Now output everything which is not HTML + +for part in ATTACH: + if not part[1]: + for x in range(part[0], part[2]): + sys.stdout.write(lines[x]) + +