Newer
Older
tren / tren.py
#!/usr/bin/env python
# tren.py
# Copyright (c) 2010 TundraWare Inc.
# For Updates See:  http://www.tundraware.com/Software/tren

# Program Information

PROGNAME = "tren.py"
BASENAME = PROGNAME.split(".py")[0]
PROGENV  = BASENAME.upper()
RCSID = "$Id: tren.py,v 1.144 2010/02/05 05:42:45 tundra Exp $"
VERSION = RCSID.split()[2]

# Copyright Information

CPRT         = "(c)"
DATE         = "2010"
OWNER        = "TundraWare Inc."
RIGHTS       = "All Rights Reserved."
COPYRIGHT    = "Copyright %s %s, %s  %s" % (CPRT, DATE, OWNER, RIGHTS)

PROGVER      = PROGNAME + " " + VERSION + (" - %s" % COPYRIGHT)
HOMEPAGE     = "http://www.tundraware.com/Software/%s\n" % BASENAME



#----------------------------------------------------------#
#            Variables User Might Change                   #
#----------------------------------------------------------#



#------------------- Nothing Below Here Should Need Changing ------------------#


#----------------------------------------------------------#
#                       Imports                            #
#----------------------------------------------------------#

import copy
import getopt
import os
from   stat import *
import sys


#----------------------------------------------------------#
#                 Aliases & Redefinitions                  #
#----------------------------------------------------------#



#----------------------------------------------------------#
#                Constants & Literals                      #
#----------------------------------------------------------#



#####
# General Program Constants
#####

MAXINCLUDES  =  50          # Maximum number of includes allowed

#####
# Message Formatting Constants
#####

# Make sure these make sense: ProgramOptions[MAXLINELEN] > PADWIDTH + WRAPINDENT
# because of the way line conditioning/wrap works.

PADCHAR      =  " "         # Padding character
PADWIDTH     =  30          # Column width
LSTPAD       =  13          # Padding to use when dumping lists
WRAPINDENT   =   8          # Extra indent on wrapped lines
MINLEN       =  PADWIDTH + WRAPINDENT + 1  # Minimum line length


#####
# Literals
#####

ALL          =  "All"       # Rename target is whole filename
COMMENT      =  "#"         # Comment character in include files
DEFEXT       =  "."         # Default name/extension separator
DEFLEN       =  75          # Default output line length
DEFSEP       =  "="         # Default rename command separator: old=new
ESC          =  "\\"        # Escape character
EXT          =  "Ext"       # Rename target is extension
INCL         =  "-I"        # Include file command line option
NAM          =  "Nam"       # Rename target is name

# Internal program state literals

DEBUG          =  "DEBUG"
CASESENSITIVE  =  "CASESENSITIVE"
ERRORCONTINUE  =  "ERRORCONTINUE"
EXTDELIM       =  "EXTDELIM"
FORCERENAM     =  "FORCERENAM"
GLOBAL         =  "GLOBAL"
MAXLINELEN     =  "MAXLINELEN"
QUIET          =  "QUIET"
REGEX          =  "REGEX"
RENSEP         =  "RENSEP"
TARGET         =  "TARGET"
TESTMODE       =  "TESTMODE"

# Rename target keys

BASE           =  "BASENAME"
PATHNAME       =  "PATHNAME"          
STATS          =  "STATS"
ORDERBYCMDLINE =  "ORDERBYCOMMANDLINE"
ORDERBYALPHA   =  "ORDERBYALPHA"
ORDERBYMODE    =  "ORDERBYMODE"
ORDERBYINODE   =  "ORDERBYINODE"
ORDERBYDEV     =  "ORDERBYDEV"
ORDERBYNLINK   =  "ORDERBYNLINK"
ORDERBYUID     =  "ORDERBYUID"
ORDERBYGID     =  "ORDERBYGID"
ORDERBYATIME   =  "ORDERBYATIME"
ORDERBYCTIME   =  "ORDERBYCTIME"
ORDERBYMTIME   =  "ORDERBYMTIME"
ORDERBYSIZE    =  "ORDERBYSIZE"

# Rename string keys

NEW            = "NEW"
OLD            = "OLD"



#----------------------------------------------------------#
#              Prompts, & Application Strings              #
#----------------------------------------------------------#


#####
# Debug Messages
#####

DEBUGFLAG     =   "-d"
dCMDLINE      =   "Command Line"
dCURSTATE     =   "Current State Of Program Options"
dDEBUG        =   "DEBUG"
dDUMPOBJ      =   "Dumping Object %s"
dINCLFILES    =   "Included Files:"
dPROGENV      =   "$" + PROGENV
dRENREQ       =   "Renaming Request:"
dRENTARGET    =   "Rename Target:"
dRESOLVEDOPTS =   "Resolved Command Line"
dSEPCHAR      =   "-"     # Used for debug separator lines
dSORTVIEW     =   "Sort View:"


#####
# Error Messages
#####

eBADARG       =  "Invalid command line: %s!"
eBADINCL      =  "option %s requires argument" % INCL
eBADNEWOLD    =  "Bad -r argument '%s'!  Requires exactly one new, old string separator (Default: " + DEFSEP + ")"
eBADLEN       =  "Bad line length '%s'!"
eERROR        =  "ERROR"
eFILEOPEN     =  "Cannot open file '%s': %s!"
eLINELEN      =  "Specified line length too short!  Must be at least %s" % MINLEN
eNOTHINGTODO  =  "Nothing to do!"
eTOOMANYINC   =  "Too many includes! (Max is %d) Possible circular reference?" % MAXINCLUDES


#####
# Warning Messages
#####


#####
# Usage Prompts
#####

uTable = [PROGVER,
          HOMEPAGE,
          "usage:  " + PROGNAME + " [-abCcdEefGghqtvwXx] [-I file] [-l string] [-r old=new]... file|dir file|dir ...",
          "   where,",
          "         -a            Rename within the entire file or directory name (Default)",
          "         -C            Do case-sensitive renaming (Default)",
          "         -c            Collapse case when doing string substitution.",
          "         -d            Dump debugging information",
          "         -e            Only perform renaming within extension portion of or directory name.",
          "         -E            Continue renaming even after an error is encountered",
          "         -f            Force renaming even if target file or directory name already exists.",
          "         -G            Rename only the first instance of the specified string (Default)",
          "         -g            Replace all instances (global rename) of the old string with the new.",
          "         -h            Print help information.",
          "         -I file       Include command line arguments from file",
          "         -l string     File extension delimiter string. (Default: .)",
          "         -q            Quiet mode, do not show progress.",
          "         -R rensep     Separator string for -r rename arguments. (Default: =)",
          "         -r old=new    Replace old with new in file or directory names.",
          "         -t            Test mode, don't rename, just show what the program *would* do",
          "         -v            Print detailed program version information and exit.",
          "         -w            Line length of diagnostic and error output (Default: 75)",
          "         -X            Treat the renaming strings literally (Default)",
          "         -x            Treat the old replacement string as a Python regular expression",
         ]

#----------------------------------------------------------#
#          Global Variables & Data Structures              #
#----------------------------------------------------------#

# Program toggle and option defaults

IncludedFiles     = []

ProgramOptions    = {

                     DEBUG         : False,     # Debugging off
                     CASESENSITIVE : True,      # Search is case-sensitive
                     ERRORCONTINUE : False,     # Do not continue after error
                     EXTDELIM      : DEFEXT,    # Name/Extension delimiter
                     FORCERENAM    : False,     # Do not rename if target already exists
                     GLOBAL        : False,     # Only rename first instance of old string
                     MAXLINELEN    : DEFLEN,    # Width of output messages
                     QUIET         : False,     # Display progress
                     REGEX         : False,     # Do not treat old string as a regex
                     RENSEP        : DEFSEP,    # Old, New string separator for -r
                     TARGET        : ALL,       # Can be "All", "Name", or "Ext"
                     TESTMODE      : False      # Global data structures
                    }


#--------------------------- Code Begins Here ---------------------------------#


#----------------------------------------------------------#
#             Object Base Class Definitions                #
#----------------------------------------------------------#


#####
# Container For Holding Rename Targets And Renaming Requests
#####

class RenameTargets:

    """ 
        This class is used to keep track of all the files and/or
        directories we're renaming.  After the class is constructed
        and the command line fully parsed, this will contain:

        self.RenNames    = { fullname : {BASE : basename, PATHNAME : pathtofile, STAT : stats}
                             ... (repeated for each rename target)
                           }

        self.SortViews   = {

                             ORDERBYCMDLINE : [fullnames in command line order],
                             ORDERBYALPHA   : [fullnames in alphabetic order],
                             ORDERBYMODE    : [fullnames in mode order],
                             ORDERBYINODE   : [fullnames in inode order],
                             ORDERBYDEV     : [fullnames in devs order],
                             ORDERBYNLINK   : [fullnames in nlinks order],
                             ORDERBYUID     : [fullnames in uids order],
                             ORDERBYGID     : [fullnames in gids order],
                             ORDERBYATIME   : [fullnames in atimes order],
                             ORDERBYCTIME   : [fullnames in ctimes order],
                             ORDERBYMTIME   : [fullnames in mtimes order],
                             ORDERBYSIZE    : [fullnames in size order]
                            }

        self.RenRequests =  [
                             { OLD           : old rename string,
                               NEW           : new rename string,
                               DEBUG         : debug flag,
                               CASESENSITIVE : case sensitivity flag,
                               ERRORCONTINUE : error continuation flag,
                               EXTDELIM      : name/Extension delimiter string,
                               FORCERENAM    : force renaming flag,
                               GLOBAL        : global replace flag,
                               MAXLINELEN    : max output line length,
                               QUIET         : quiet output flag,
                               REGEX         : regular expression enable flag,
                               RENSEP        : old/new rename separator string,
                               TARGET        : target field ,
                               TESTMODE      : testmode flag
                             } ... (repeated for each rename request)
                            ]

    """

    #####
    # Constructor
    #####

    def __init__(self, targs):

        # Dictionary of all rename targets and their stat info

        self.RenNames   =   {}

        # Dictionary of all possible sort views
        # We can load the first two right away since they're based
        # only on the target names provided on the command line

        i=0
        while i < len(targs):
            targs[i] = os.path.abspath(targs[i])
            i += 1

        alpha = targs[:]
        alpha.sort()
        self.SortViews  =   {ORDERBYCMDLINE : targs, ORDERBYALPHA : alpha}
        del alpha

        # Dictionary of all the renaming requests - will be filled in
        # by -r command line parsing.

        self.RenRequests = []


        # This data structure is used while we build up sort
        # orders based on stat information.

        SeqTypes = [ [ST_MODE,   {},  ORDERBYMODE],
                     [ST_INO,    {},  ORDERBYINODE],
                     [ST_DEV,    {},  ORDERBYDEV],
                     [ST_NLINK,  {},  ORDERBYNLINK],
                     [ST_UID,    {},  ORDERBYUID],
                     [ST_GID,    {},  ORDERBYGID],
                     [ST_ATIME,  {},  ORDERBYATIME],
                     [ST_CTIME,  {},  ORDERBYCTIME],
                     [ST_MTIME,  {},  ORDERBYMTIME],
                     [ST_SIZE,   {},  ORDERBYSIZE],
                   ]

        # Populate the data structures with each targets' stat information

        for fullname in targs:

            try:
                basename = os.path.basename(fullname)
                stats    = os.stat(fullname)
            except (IOError, OSError) as e:
                ErrorMsg(eFILEOPEN % (fullname, e.args[1]))
                sys.exit(1)

            # Store fullname, basename, and stat info for this file

            self.RenNames[fullname] = {BASE : basename, PATHNAME : fullname.split(basename)[0], STATS : stats}

            # Incrementally build lists of keys that will later be
            # used to create sequence renaming tokens

            for seqtype in SeqTypes:

                statflag, storage, order = seqtype

                # Handle os.stat() values

                statval = stats[statflag]

                if statval in storage:
                    storage[statval].append(fullname)
                else:
                    storage[statval] = [fullname]


        # Create the various sorted views we may need for sequence
        # renaming tokens

        for seqtype in SeqTypes:

            statflag, storage, order = seqtype

            vieworder = storage.keys()
            vieworder.sort()            

            # Sort alphabetically when multiple filenames
            # map to the same key, creating overall
            # ordering as we go.

            t = []
            for i in vieworder:
                storage[i].sort()
                for j in storage[i]:
                    t.append(j)

            # Now store for future reference

            self.SortViews[order] = t

        # Release the working data structures

        del SeqTypes

    # End of '__ini__()'


    #####
    # Debug Dump 
    #####

    def DumpObj(self):

        SEPARATOR = dSEPCHAR * ProgramOptions[MAXLINELEN]
        DebugMsg("\n")
        DebugMsg(SEPARATOR)
        DebugMsg(dDUMPOBJ % str(self))
        DebugMsg(SEPARATOR)


        # Dump the RenNames and SortView dictionaries

        for i, msg in ((self.RenNames, dRENTARGET), (self.SortViews, dSORTVIEW)):

            for j in i:
                DumpList(DebugMsg, msg, j, i[j])

        for rr in self.RenRequests:
            DumpList(DebugMsg, dRENREQ, "", rr)

        DebugMsg(SEPARATOR + "\n\n")

    # End of 'DumpObj()'


    #####
    # Go Do The Requested Renaming
    #####

    def Rename(self):


        # Make sure we actually have work to do

        if not self.SortViews[ORDERBYCMDLINE] or not self.RenRequests:

            ErrorMsg(eNOTHINGTODO)
            return

        # Iterate over all the target filenames in command line order,
        # applying each renaming in requested order


        for target in self.SortViews[ORDERBYCMDLINE]:

            basename, pathname = self.RenNames[target][BASE], self.RenNames[target][PATHNAME]
            for renrequest in self.RenRequests:

                old, new = self.ResolveRenameStrings(renrequest[OLD], renrequest[NEW])
                oldstrings = []

                # Build a list of indexes to every occurence of the old string,
                # taking case sensitivity into account

                # Handle the case when old = "".
                # This means to *replace the entire* old name with new.
                    
                if not old:
                    old = basename

                # Collapse case if requested

                name = basename
                if not renrequest[CASESENSITIVE]:

                    name = name.lower()
                    old  = old.lower()

                i = name.find(old)
                while i >= 0:

                    oldstrings.append(i)
                    i = name.find(old, i + len(old))

                # If we found any maching strings, replace them

                if oldstrings:
                    
                    # Only process leftmost occurence if global replace is off

                    if not renrequest[GLOBAL]:
                        oldstrings = [oldstrings[0],]

                    # Replace selected substring(s).
                    # Substitute from R->L in original string
                    # so as not to mess up the replacement indicies.

                    oldstrings.reverse()
                    for i in oldstrings:
                        basename = basename[:i] + new + basename[i + len(old):]

            print ColumnPad([target, pathname + basename], padwidth = 50)

    # End of 'Rename()'


    #####
    # Resolve Rename Strings
    #####

    """ This takes "old" and "new" renaming strings as input and resolves
        all outstanding renaming token references so that they can
        then be applied to the rename.
    """

    def ResolveRenameStrings(self, old, new):

        return [old, new]

    # End of 'ReolveRenameStrings()'


# End of class 'RenameTargets'
    

#----------------------------------------------------------#
#             Supporting Function Definitions              #
#----------------------------------------------------------#


#####
# Turn A List Into Columns With Space Padding
#####

def ColumnPad(list, padchar=PADCHAR, padwidth=PADWIDTH):

    retval = ""
    for l in list:
        l = str(l)
        retval += l + ((padwidth - len(l)) * padchar)

    return retval

# End of 'ColumnPad()'


#####
# Condition Line Length With Fancy Wrap And Formatting
#####

def ConditionLine(msg, 
                  padchar=PADCHAR, \
                  padwidth=PADWIDTH, \
                  wrapindent=WRAPINDENT ):

    retval = []
    retval.append(msg[:ProgramOptions[MAXLINELEN]])
    msg = msg[ProgramOptions[MAXLINELEN]:]

    while msg:
        msg = padchar * (padwidth + wrapindent) + msg
        retval.append(msg[:ProgramOptions[MAXLINELEN]])
        msg = msg[ProgramOptions[MAXLINELEN]:]

    return retval

# End of 'ConditionLine()'


#####
# Print A Debug Message
#####

def DebugMsg(msg):
 
   l = ConditionLine(msg)
   for msg in l:
        PrintStderr(PROGNAME + " " + dDEBUG + ": " + msg)

# End of 'DebugMsg()'


#####
# Debug Dump Of A List
#####

def DumpList(handler, msg, listname, content):

    handler(msg)
    itemarrow = ColumnPad([listname, " "], padwidth=LSTPAD)
    handler(ColumnPad([" ", " %s %s" % (itemarrow, content)]))

# End of 'DumpList()'


#####
# Dump The State Of The Program
#####

def DumpState():

    SEPARATOR = dSEPCHAR * ProgramOptions[MAXLINELEN]
    DebugMsg(SEPARATOR)
    DebugMsg(dCURSTATE)
    DebugMsg(SEPARATOR)

    opts = ProgramOptions.keys()
    opts.sort()
    for o in opts:
        DebugMsg(ColumnPad([o, ProgramOptions[o]]))

    DebugMsg(SEPARATOR)


# End of 'DumpState()'


#####
# Print An Error Message
#####

def ErrorMsg(emsg):

    l = ConditionLine(emsg)

    for emsg in l:
        PrintStderr(PROGNAME + " " + eERROR + ": " + emsg)

# End of 'ErrorMsg()'

#####
# Split -r Argument Into Separate Old And New Strings
#####

def GetOldNew(arg):


    escaping = False
    numseps  = 0 
    sepindex = 0
    oldnewsep = ProgramOptions[RENSEP]

    i = 0
    while i < len(arg):

        # Scan string ignoring escaped separators

        if arg[i:].startswith(oldnewsep):

            if (i > 0 and (arg[i-1] != ESC)) or i == 0:
                sepindex = i
                numseps += 1
            
            i += len(oldnewsep)

        else:
            i += 1


    if numseps != 1:
        ErrorMsg(eBADNEWOLD % arg)
        sys.exit(1)

    else:
        old, new = arg[:sepindex], arg[sepindex + len(oldnewsep):]
        old = old.replace(ESC + oldnewsep, oldnewsep)
        new = new.replace(ESC + oldnewsep, oldnewsep)
        return [old, new]

# End of 'GetOldNew()'


#####
# Print To stderr
#####

def PrintStderr(msg, trailing="\n"):
    sys.stderr.write(msg + trailing)

# End of 'PrintStderr()'


#####
# Print To stdout
#####

def PrintStdout(msg, trailing="\n"):
    sys.stdout.write(msg + trailing)

# End of 'PrintStdout'


#####
# Process Include Files On The Command Line
#####

def ProcessIncludes(OPTIONS):

    # Resolve include file references allowing for nested includes.
    # This has to be done here separate from the command line options so
    # that getopt() processing below will "see" the included statements.

    NUMINCLUDES = 0
    while " ". join(OPTIONS).find(INCL) > -1:
    
        # Get the index of the next include to process.
        # It cannot be the last item because this means the filename
        # to include is missing.

        i = OPTIONS.index(INCL)

        # You can't end a command line with the include option and no argument

        if i == len(OPTIONS)-1:
            ErrorMsg(eBADARG % eBADINCL)
            sys.exit(1)
        
        # Handle the case where the user does- and does not separate
        # the option and the filename with a space since both
        # forms are legal command line syntax

        # The use included the space
        if OPTIONS[i] == INCL:
            file = OPTIONS[i+1] ; lhs = OPTIONS[:i] ; rhs = OPTIONS[i+2:]

        # The the option and filename are not space separated
        else:
            file = OPTIONS[i].replace(INCL,"") ; lhs = OPTIONS[:i] ; rhs = OPTIONS[i+1:]

        # Keep track of- and limit the number of includes allowed
        # This is an easy way to stop circular (infinite) includes.
    
        NUMINCLUDES += 1
        if NUMINCLUDES >= MAXINCLUDES:
            ErrorMsg(eTOOMANYINC)
            sys.exit(1)
    
        # Replace insert option on the command line with that file's contents.
        # Handle comments within lines.
    
        try:
            n = []
            f = open(file)
            for l in f.readlines():
                l = l.split(COMMENT)[0]
                n += l.split()
            f.close()
    
            # Keep track of the filenames being included

            IncludedFiles.append(os.path.abspath(file))

            OPTIONS = lhs + n + rhs
    
        except IOError as e:
            ErrorMsg(eFILEOPEN % (file, e.args[1]))
            sys.exit(1)

    return OPTIONS

# End of 'ProcessIncludes()'


#####
# Print Usage Information
#####

def Usage():
    for line in uTable:
        PrintStdout(line)

# End of 'Usage()'


#----------------------------------------------------------#
#                    Program Entry Point                   #
#----------------------------------------------------------#

#####
# Command Line Preprocessing
# 
# Some things have to be done *before* the command line
# options can actually be processed.  This includes:
#
#  1) Prepending any options specified in the environment variable.
#
#  2) Resolving any include file references
#
#  3) Separating the command line into [options ... filenames ..]
#     groupings so that user can interweave multiple options
#     and names on the command line.
#
#  4) Building the data structures that depend on the file/dir names
#     specified for renaming.  We have to do this first, because
#     -r renaming operations specified on the command line will
#     need this information if they make use of renaming tokens.
#
#####

# Process any options set in the environment first, and then those
# given on the command line


OPTIONS = sys.argv[1:]

envopt = os.getenv(PROGENV)
if envopt:
    OPTIONS = envopt.split() + OPTIONS

# Deal with include files

OPTIONS = ProcessIncludes(OPTIONS)

# And parse the command line

try:
    opts, args = getopt.getopt(OPTIONS, 'abbCcdEefGghl:qR:r:tvw:Xx]')
except getopt.GetoptError as e:
    ErrorMsg(eBADARG % e.args[0])
    sys.exit(1)

# Create and populate an object with rename targets.  This must be
# done here because this object also stores the -r renaming requests
# we may find in the options processing below.  Also, this object must
# be fully populated before any actual renaming can take places since
# many of the renaming tokens derive information about the file.

targs = RenameTargets(args)

# Now process the options

for opt, val in opts:

    if opt == "-a":
        ProgramOptions[TARGET] = ALL

    if opt == "-b":
        ProgramOptions[TARGET] = NAM

    if opt == "-C":
        ProgramOptions[CASESENSITIVE] = True

    if opt == "-c":
        ProgramOptions[CASESENSITIVE] = False

    if opt == "-d":
        ProgramOptions[DEBUG] = True
        DumpState()

    if opt == "-E":
        ProgramOptions[ERRORCONTINUE] = True

    if opt == "-e":
        ProgramOptions[TARGET] = EXT

    if opt == "-f":
        ProgramOptions[FORCERENAM] = True

    if opt == "-G":
        ProgramOptions[GLOBAL] = False

    if opt == "-g":
        ProgramOptions[GLOBAL] = True

    if opt == "-h":
        Usage()
        sys.exit(0)

    if opt == "-l":
        ProgramOptions[EXTDELIM] = val

    if opt == "-q":
        ProgramOptions[QUIET] = True

    if opt == '-R':
        ProgramOptions[RENSEP] = val

    if opt == "-r":
        req = {}
        req[OLD], req[NEW] = GetOldNew(val)
        for opt in ProgramOptions:
            req[opt] = ProgramOptions[opt]
        targs.RenRequests.append(req)

    if opt == "-t":
        ProgramOptions[TESTMODE] = True

    if opt == "-v":
        PrintStdout(RCSID)
        sys.exit(0)

    if opt == "-w":
        try:
            l = int(val)
        except:
            ErrorMsg(eBADLEN % val)
            sys.exit(1)
        if l < MINLEN:
            ErrorMsg(eLINELEN)
            sys.exit(1)
        ProgramOptions[MAXLINELEN] = l

    if opt == "-X":
        ProgramOptions[REGEX] = False

    if opt == "-x":
        ProgramOptions[REGEX] = True


# At this point, the command line has been fully processed and the
# container fully populated.  Provide debug info about both if
# requested.

if ProgramOptions[DEBUG]:

    # Dump what we know about the command line

    DumpList(DebugMsg, dCMDLINE, "", sys.argv)
    DumpList(DebugMsg, dPROGENV, "", envopt)
    DumpList(DebugMsg, dRESOLVEDOPTS, "", OPTIONS)

    # Dump what we know about included files

    DumpList(DebugMsg, dINCLFILES, "", IncludedFiles)

    # Dump what we know about the container

    targs.DumpObj()


# Perform reqested renamings

targs.Rename()


# Release the target container if we created one

del targs