Newer
Older
tren / tren.py
#!/usr/bin/env python
# tren.py
# Copyright (c) 2010 TundraWare Inc.
# For Updates See:  http://www.tundraware.com/Software/tren

# Program Information

PROGNAME = "tren.py"
BASENAME = PROGNAME.split(".py")[0]
PROGENV  = BASENAME.upper()
RCSID = "$Id: tren.py,v 1.138 2010/02/03 01:04:08 tundra Exp $"
VERSION = RCSID.split()[2]

# Copyright Information

CPRT         = "(c)"
DATE         = "2010"
OWNER        = "TundraWare Inc."
RIGHTS       = "All Rights Reserved."
COPYRIGHT    = "Copyright %s %s, %s  %s" % (CPRT, DATE, OWNER, RIGHTS)

PROGVER      = PROGNAME + " " + VERSION + (" - %s" % COPYRIGHT)
HOMEPAGE     = "http://www.tundraware.com/Software/%s\n" % BASENAME



#----------------------------------------------------------#
#            Variables User Might Change                   #
#----------------------------------------------------------#



#------------------- Nothing Below Here Should Need Changing ------------------#


#----------------------------------------------------------#
#                       Imports                            #
#----------------------------------------------------------#

import copy
import getopt
import os
from   stat import *
import sys


#----------------------------------------------------------#
#                 Aliases & Redefinitions                  #
#----------------------------------------------------------#



#----------------------------------------------------------#
#                Constants & Literals                      #
#----------------------------------------------------------#



#####
# General Program Constants
#####

MAXINCLUDES  =  50          # Maximum number of includes allowed

#####
# Message Formatting Constants
#####

# Make sure these make sense: ProgramOptions[MAXLINELEN] > PADWIDTH + WRAPINDENT
# because of the way line conditioning/wrap works.

PADCHAR      =  " "         # Padding character
PADWIDTH     =  30          # Column width
LSTPAD       =  13          # Padding to use when dumping lists
WRAPINDENT   =   8          # Extra indent on wrapped lines
MINLEN       =  PADWIDTH + WRAPINDENT + 1  # Minimum line length


#####
# Literals
#####

ALL          =  "All"       # Rename target is whole filename
COMMENT      =  "#"         # Comment character in include files
DEFEXT       =  "."         # Default name/extension separator
DEFLEN       =  75          # Default output line length
DEFSEP       =  "="         # Default rename command separator: old=new
ESC          =  "\\"        # Escape character
EXT          =  "Ext"       # Rename target is extension
INCL         =  "-I"        # Include file command line option
NAM          =  "Nam"       # Rename target is name

# Internal program state literals

DEBUG          =  "DEBUG"
CASESENSITIVE  =  "CASESENSITIVE"
ERRORCONTINUE  =  "ERRORCONTINUE"
EXTDELIM       =  "EXTDELIM"
FORCERENAM     =  "FORCERENAM"
GLOBAL         =  "GLOBAL"
MAXLINELEN     =  "MAXLINELEN"
QUIET          =  "QUIET"
REGEX          =  "REGEX"
RENSEP         =  "RENSEP"
TARGET         =  "TARGET"
TESTMODE       =  "TESTMODE"

# Rename target keys

BASE           =  "BASENAME"
STATS          =  "STATS"
ORDERBYCMDLINE =  "ORDERBYCOMMANDLINE"
ORDERBYALPHA   =  "ORDERBYALPHA"
ORDERBYMODE    =  "ORDERBYMODE"
ORDERBYINODE   =  "ORDERBYINODE"
ORDERBYDEV     =  "ORDERBYDEV"
ORDERBYNLINK   =  "ORDERBYNLINK"
ORDERBYUID     =  "ORDERBYUID"
ORDERBYGID     =  "ORDERBYGID"
ORDERBYATIME   =  "ORDERBYATIME"
ORDERBYCTIME   =  "ORDERBYCTIME"
ORDERBYMTIME   =  "ORDERBYMTIME"
ORDERBYSIZE    =  "ORDERBYSIZE"

# Rename string keys

NEW            = "NEW"
OLD            = "OLD"



#----------------------------------------------------------#
#              Prompts, & Application Strings              #
#----------------------------------------------------------#


#####
# Debug Messages
#####

DEBUGFLAG     =   "-d"
dCMDLINE      =   "Command Line"
dCURSTATE     =   "Current State Of Program Options"
dDEBUG        =   "DEBUG"
dDUMPOBJ      =   "Dumping Object %s"
dINCLUDING    =   "Including file '%s'"
dPROGENV      =   "$" + PROGENV
dRENREQ       =   "Renaming Request:"
dRENTARGET    =   "Rename Target:"
dRESOLVEDOPTS =   "Resolved Command Line"
dSEPCHAR      =   "-"     # Used for debug separator lines
dSORTVIEW     =   "Sort View:"


#####
# Error Messages
#####

eBADARG       =  "Invalid command line: %s!"
eBADINCL      =  "option %s requires argument" % INCL
eBADNEWOLD    =  "Bad -r argument '%s'!  Requires exactly one new, old string separator (Default: " + DEFSEP + ")"
eBADLEN       =  "Bad line length '%s'!"
eERROR        =  "ERROR"
eFILEOPEN     =  "Cannot open file '%s': %s!"
eLINELEN      =  "Specified line length too short!  Must be at least %s" % MINLEN
eTOOMANYINC   =  "Too many includes! (Max is %d) Possible circular reference?" % MAXINCLUDES


#####
# Warning Messages
#####


#####
# Usage Prompts
#####

uTable = [PROGVER,
          HOMEPAGE,
          "usage:  " + PROGNAME + " [-1abCcdEefghqtvwXx] [-I file] [-l string] [-r old=new]... file|dir file|dir ...",
          "   where,",
          "         -1            Rename only the first instance of the specified string (Default)",
          "         -a            Rename within the entire file or directory name (Default)",
          "         -C            Do case-sensitive renaming (Default)",
          "         -c            Collapse case when doing string substitution.",
          "         -d            Dump debugging information",
          "         -e            Only perform renaming within extension portion of or directory name.",
          "         -E            Continue renaming even after an error is encountered",
          "         -f            Force renaming even if target file or directory name already exists.",
          "         -g            Replace all instances (global rename) of the old string with the new.",
          "         -h            Print help information.",
          "         -I file       Include command line arguments from file",
          "         -l string     File extension delimiter string. (Default: .)",
          "         -q            Quiet mode, do not show progress.",
          "         -R rensep     Separator string for -r rename arguments. (Default: =)",
          "         -r old=new    Replace old with new in file or directory names.",
          "         -t            Test mode, don't rename, just show what the program *would* do",
          "         -v            Print detailed program version information and exit.",
          "         -w            Line length of diagnostic and error output (Default: 75)",
          "         -X            Treat the renaming strings literally (Default)",
          "         -x            Treat the old replacement string as a Python regular expression",
         ]

#----------------------------------------------------------#
#          Global Variables & Data Structures              #
#----------------------------------------------------------#

# Program toggle and option defaults

ProgramOptions    = {

                     DEBUG         : False,     # Debugging off
                     CASESENSITIVE : True,      # Search is case-sensitive
                     ERRORCONTINUE : False,     # Do not continue after error
                     EXTDELIM      : DEFEXT,    # Name/Extension delimiter
                     FORCERENAM    : False,     # Do not rename if target already exists
                     GLOBAL        : False,     # Only rename first instance of old string
                     MAXLINELEN    : DEFLEN,    # Width of output messages
                     QUIET         : False,     # Display progress
                     REGEX         : False,     # Do not treat old string as a regex
                     RENSEP        : DEFSEP,    # Old, New string separator for -r
                     TARGET        : ALL,       # Can be "All", "Name", or "Ext"
                     TESTMODE      : False      # Global data structures
                    }


#--------------------------- Code Begins Here ---------------------------------#


#----------------------------------------------------------#
#             Object Base Class Definitions                #
#----------------------------------------------------------#


#####
# Container For Holding Rename Targets And Renaming Requests
#####

class RenameTargets:

    """ 
        This class is used to keep track of all the files and/or
        directories we're renaming.  After the class is constructed
        and the command line fully parsed, this will contain:

        self.RenNames    = { fullname : {BASE : basename, STAT : stats}
                             ... (repeated for each rename target)
                           }

        self.SortViews   = {

                             ORDERBYCMDLINE : [fullnames in command line order],
                             ORDERBYALPHA   : [fullnames in alphabetic order],
                             ORDERBYMODE    : [fullnames in mode order],
                             ORDERBYINODE   : [fullnames in inode order],
                             ORDERBYDEV     : [fullnames in devs order],
                             ORDERBYNLINK   : [fullnames in nlinks order],
                             ORDERBYUID     : [fullnames in uids order],
                             ORDERBYGID     : [fullnames in gids order],
                             ORDERBYATIME   : [fullnames in atimes order],
                             ORDERBYCTIME   : [fullnames in ctimes order],
                             ORDERBYMTIME   : [fullnames in mtimes order],
                             ORDERBYSIZE    : [fullnames in size order]
                            }

        self.RenRequests =  [
                             { OLD           : old rename string,
                               NEW           : new rename string,
                               DEBUG         : debug flag,
                               CASESENSITIVE : case sensitivity flag,
                               ERRORCONTINUE : error continuation flag,
                               EXTDELIM      : name/Extension delimiter string,
                               FORCERENAM    : force renaming flag,
                               GLOBAL        : global replace flag,
                               MAXLINELEN    : max output line length,
                               QUIET         : quiet output flag,
                               REGEX         : regular expression enable flag,
                               RENSEP        : old/new rename separator string,
                               TARGET        : target field ,
                               TESTMODE      : testmode flag
                             } ... (repeated for each rename request)
                            ]

    """

    #####
    # Constructor
    #####

    def __init__(self, targs):

        # Dictionary of all rename targets and their stat info

        self.RenNames   =   {}

        # Dictionary of all possible sort views
        # We can load the first two right away since they're based
        # only on the target names provided on the command line

        i=0
        while i < len(targs):
            targs[i] = os.path.abspath(targs[i])
            i += 1

        alpha = targs[:]
        alpha.sort()
        self.SortViews  =   {ORDERBYCMDLINE : targs, ORDERBYALPHA : alpha}
        del alpha

        # Dictionary of all the renaming requests - will be filled in
        # by -r command line parsing.

        self.RenRequests = []


        # This data structure is used while we build up sort
        # orders based on stat information.

        SeqTypes = [ [ST_MODE,   {},  ORDERBYMODE],
                     [ST_INO,    {},  ORDERBYINODE],
                     [ST_DEV,    {},  ORDERBYDEV],
                     [ST_NLINK,  {},  ORDERBYNLINK],
                     [ST_UID,    {},  ORDERBYUID],
                     [ST_GID,    {},  ORDERBYGID],
                     [ST_ATIME,  {},  ORDERBYATIME],
                     [ST_CTIME,  {},  ORDERBYCTIME],
                     [ST_MTIME,  {},  ORDERBYMTIME],
                     [ST_SIZE,   {},  ORDERBYSIZE],
                     ]

        # Populate the data structures with each targets' stat information

        for fullname in targs:

            try:
                basename = os.path.basename(fullname)
                stats    = os.stat(fullname)
            except (IOError, OSError) as e:
                ErrorMsg(eFILEOPEN % (fullname, e.args[1]))
                sys.exit(1)

            # Store fullname, basename, and stat info for this file

            self.RenNames[fullname] = {BASE : basename, STATS : stats}

            # Incrementally build lists of keys that will later be
            # used to create sequence renaming tokens

            for seqtype in SeqTypes:

                statflag, storage, order = seqtype

                # Handle os.stat() values

                statval = stats[statflag]

                if statval in storage:
                    storage[statval].append(fullname)
                else:
                    storage[statval] = [fullname]


        # Create the various sorted views we may need for sequence
        # renaming tokens

        for seqtype in SeqTypes:

            statflag, storage, order = seqtype

            vieworder = storage.keys()
            vieworder.sort()            

            # Sort alphabetically when multiple filenames
            # map to the same key, creating overall
            # ordering as we go.

            t = []
            for i in vieworder:
                storage[i].sort()
                for j in storage[i]:
                    t.append(j)

            # Now store for future reference

            self.SortViews[order] = t

        # Release the working data structures

        del SeqTypes

    # End of '__ini__()'


    #####
    # Debug Dump 
    #####

    def DumpObj(self):

        SEPARATOR = dSEPCHAR * ProgramOptions[MAXLINELEN]
        DebugMsg("\n")
        DebugMsg(SEPARATOR)
        DebugMsg(dDUMPOBJ % str(self))
        DebugMsg(SEPARATOR)


        # Dump the RenNames and SortView dictionaries

        for i, msg in ((self.RenNames, dRENTARGET), (self.SortViews, dSORTVIEW)):

            for j in i:
                DumpList(DebugMsg, msg, j, i[j])

        for rr in self.RenRequests:
            DumpList(DebugMsg, dRENREQ, "", rr)

        DebugMsg(SEPARATOR + "\n\n")

    # End of 'DumpObj()'

# End of class 'RenameTargets'
    

#----------------------------------------------------------#
#             Supporting Function Definitions              #
#----------------------------------------------------------#


#####
# Turn A List Into Columns With Space Padding
#####

def ColumnPad(list, padchar=PADCHAR, padwidth=PADWIDTH):

    retval = ""
    for l in list:
        l = str(l)
        retval += l + ((padwidth - len(l)) * padchar)

    return retval

# End of 'ColumnPad()'


#####
# Condition Line Length With Fancy Wrap And Formatting
#####

def ConditionLine(msg, 
                  padchar=PADCHAR, \
                  padwidth=PADWIDTH, \
                  wrapindent=WRAPINDENT ):

    retval = []
    retval.append(msg[:ProgramOptions[MAXLINELEN]])
    msg = msg[ProgramOptions[MAXLINELEN]:]

    while msg:
        msg = padchar * (padwidth + wrapindent) + msg
        retval.append(msg[:ProgramOptions[MAXLINELEN]])
        msg = msg[ProgramOptions[MAXLINELEN]:]

    return retval

# End of 'ConditionLine()'


#####
# Print A Debug Message
#####

def DebugMsg(msg):
 
   l = ConditionLine(msg)
   for msg in l:
        PrintStderr(PROGNAME + " " + dDEBUG + ": " + msg)

# End of 'DebugMsg()'


#####
# Debug Dump Of A List
#####

def DumpList(handler, msg, listname, content):

    handler(msg)
    itemarrow = ColumnPad([listname, " "], padwidth=LSTPAD)
    handler(ColumnPad([" ", " %s %s" % (itemarrow, content)]))

# End of 'DumpList()'


#####
# Dump The State Of The Program
#####

def DumpState():

    SEPARATOR = dSEPCHAR * ProgramOptions[MAXLINELEN]
    DebugMsg(SEPARATOR)
    DebugMsg(dCURSTATE)
    DebugMsg(SEPARATOR)

    opts = ProgramOptions.keys()
    opts.sort()
    for o in opts:
        DebugMsg(ColumnPad([o, ProgramOptions[o]]))

    DebugMsg(SEPARATOR)


# End of 'DumpState()'


#####
# Print An Error Message
#####

def ErrorMsg(emsg):

    l = ConditionLine(emsg)

    for emsg in l:
        PrintStderr(PROGNAME + " " + eERROR + ": " + emsg)

# End of 'ErrorMsg()'

#####
# Split -r Argument Into Separate Old And New Strings
#####

def GetOldNew(arg):


    escaping = False
    numseps  = 0 
    sepindex = 0
    oldnewsep = ProgramOptions[RENSEP]

    i = 0
    while i < len(arg):

        # Scan string ignoring escaped separators

        if arg[i:].startswith(oldnewsep):

            if (i > 0 and (arg[i-1] != ESC)) or i == 0:
                sepindex = i
                numseps += 1
            
            i += len(oldnewsep)

        else:
            i += 1


    if numseps != 1:
        ErrorMsg(eBADNEWOLD % arg)
        sys.exit(1)

    else:
        old, new = arg[:sepindex], arg[sepindex + len(oldnewsep):]
        old = old.replace(ESC + oldnewsep, oldnewsep)
        new = new.replace(ESC + oldnewsep, oldnewsep)
        return [old, new]

# End of 'GetOldNew()'


#####
# Print To stderr
#####

def PrintStderr(msg, trailing="\n"):
    sys.stderr.write(msg + trailing)

# End of 'PrintStderr()'


#####
# Print To stdout
#####

def PrintStdout(msg, trailing="\n"):
    sys.stdout.write(msg + trailing)

# End of 'PrintStdout'


#####
# Process Include Files On The Command Line
#####

def ProcessIncludes(OPTIONS):

    # Make sure the include argument has trailing space or the
    # code below will break.

    OPTIONS = " ".join(OPTIONS).replace(INCL, INCL+" ").split()

    # Resolve include file references allowing for nested includes.
    # This has to be done here separate from the command line options so
    # that getopt() processing below will "see" the included statements.


    NUMINCLUDES = 0
    while " ". join(OPTIONS).find(INCL) > -1:
    
        # In case we include a -I without a trailing space

        OPTIONS = " ".join(OPTIONS).replace(INCL, INCL+" ").split()

        # Get the index of the next include to process.
        # It cannot be the last item because this means the filename
        # to include is missing.

        i = OPTIONS.index(INCL)

        if i == len(OPTIONS)-1:
            ErrorMsg(eBADARG % eBADINCL)
            sys.exit(1)
    
        file = OPTIONS[i+1] ; lhs = OPTIONS[:i] ; rhs = OPTIONS[i+2:]
    
        # Keep track of- and limit the number of includes allowed
        # This is an easy way to stop circular (infinite) includes.
    
        NUMINCLUDES += 1
        if NUMINCLUDES >= MAXINCLUDES:
            ErrorMsg(eTOOMANYINC)
            sys.exit(1)
    
        # Replace insert option on the command line with that file's contents.
        # Handle comments within lines.
    
        try:
            n = []
            f = open(file)
            for l in f.readlines():
                l = l.split(COMMENT)[0]
                n += l.split()
            f.close()
    
            if ProgramOptions[DEBUG]:
                DebugMsg(dINCLUDING % file)

            OPTIONS = lhs + n + rhs
    
        except IOError as e:
            ErrorMsg(eFILEOPEN % (file, e.args[1]))
            sys.exit(1)

    return OPTIONS

# End of 'ProcessIncludes()'


#####
# Print Usage Information
#####

def Usage():
    for line in uTable:
        PrintStdout(line)

# End of 'Usage()'


#----------------------------------------------------------#
#                    Program Entry Point                   #
#----------------------------------------------------------#

#####
# Command Line Preprocessing
# 
# Some things have to be done *before* the command line
# options can actually be processed.  This includes:
#
#  1) Prepending any options specified in the environment variable.
#
#  2) Resolving any include file references
#
#  3) Separating the command line into [options ... filenames ..]
#     groupings so that user can interweave multiple options
#     and names on the command line.
#
#  4) Building the data structures that depend on the file/dir names
#     specified for renaming.  We have to do this first, because
#     -r renaming operations specified on the command line will
#     need this information if they make use of renaming tokens.
#
#####

# Process any options set in the environment first, and then those
# given on the command line


OPTIONS = sys.argv[1:]

envopt = os.getenv(PROGENV)
if envopt:
    OPTIONS = envopt.split() + OPTIONS


# Check for debug manually to see if we want
# debug info about includes

if DEBUGFLAG in OPTIONS:
    ProgramOptions[DEBUG] = True

# Deal with include files

OPTIONS = ProcessIncludes(OPTIONS)

# Check for debug manually again before we process the options
# to get debug info on command line expansion

if DEBUGFLAG in OPTIONS:
    ProgramOptions[DEBUG] = True

try:
    opts, args = getopt.getopt(OPTIONS, '1abbCcdEefghl:qR:r:tvw:Xx]')
except getopt.GetoptError as e:
    ErrorMsg(eBADARG % e.args[0])
    sys.exit(1)

# Create and populate an object with rename targets.  This must be
# done here because this object also stores the -r renaming requests
# we may find in the options processing below.  Also, this object must
# be fully populated before any actual renaming can take places since
# many of the renaming tokens derive information about the file.

targs = RenameTargets(args)

# Now process the options

for opt, val in opts:

    if opt == "-1":
        ProgramOptions[GLOBAL] = False

    if opt == "-a":
        ProgramOptions[TARGET] = ALL

    if opt == "-b":
        ProgramOptions[TARGET] = NAM

    if opt == "-C":
        ProgramOptions[CASESENSITIVE] = True

    if opt == "-c":
        ProgramOptions[CASESENSITIVE] = False

    if opt == "-d":
        DumpState()

    if opt == "-E":
        ProgramOptions[ERRORCONTINUE] = True

    if opt == "-e":
        ProgramOptions[TARGET] = EXT

    if opt == "-f":
        ProgramOptions[FORCERENAM] = True

    if opt == "-g":
        ProgramOptions[GLOBAL] = True

    if opt == "-h":
        Usage()
        sys.exit(0)

    if opt == "-l":
        ProgramOptions[EXTDELIM] = val

    if opt == "-q":
        ProgramOptions[QUIET] = True

    if opt == '-R':
        ProgramOptions[RENSEP] = val

    if opt == "-r":
        req = {}
        req[OLD], req[NEW] = GetOldNew(val)
        for opt in ProgramOptions:
            req[opt] = ProgramOptions[opt]
        targs.RenRequests.append(req)

    if opt == "-t":
        ProgramOptions[TESTMODE] = True

    if opt == "-v":
        PrintStdout(RCSID)
        sys.exit(0)

    if opt == "-w":
        try:
            l = int(val)
        except:
            ErrorMsg(eBADLEN % val)
            sys.exit(1)
        if l < MINLEN:
            ErrorMsg(eLINELEN)
            sys.exit(1)
        ProgramOptions[MAXLINELEN] = l

    if opt == "-X":
        ProgramOptions[REGEX] = False

    if opt == "-x":
        ProgramOptions[REGEX] = True


# At this point, the command line has been fully processed and the
# container fully populated.  Provide debug info about both if
# requested.

if ProgramOptions[DEBUG]:

    # Dump what we know about the command line

    DumpList(DebugMsg, dCMDLINE, "", sys.argv)
    DumpList(DebugMsg, dPROGENV, "", envopt)
    DumpList(DebugMsg, dRESOLVEDOPTS, "", OPTIONS)


    # Dump what we know about the container

    targs.DumpObj()

for target in targs.SortViews[ORDERBYCMDLINE]:
    print target

# Release the target container if we created one

del targs