Newer
Older
tren / tren.py
#!/usr/bin/env python
# tren.py
# Copyright (c) 2010 TundraWare Inc.
# For Updates See:  http://www.tundraware.com/Software/tren

# Program Information

PROGNAME = "tren.py"
BASENAME = PROGNAME.split(".py")[0]
PROGENV  = BASENAME.upper()
RCSID = "$Id: tren.py,v 1.135 2010/02/02 17:53:55 tundra Exp $"
VERSION = RCSID.split()[2]

# Copyright Information

CPRT         = "(c)"
DATE         = "2010"
OWNER        = "TundraWare Inc."
RIGHTS       = "All Rights Reserved."
COPYRIGHT    = "Copyright %s %s, %s  %s" % (CPRT, DATE, OWNER, RIGHTS)

PROGVER      = PROGNAME + " " + VERSION + (" - %s" % COPYRIGHT)
HOMEPAGE     = "http://www.tundraware.com/Software/%s\n" % BASENAME



#----------------------------------------------------------#
#            Variables User Might Change                   #
#----------------------------------------------------------#



#------------------- Nothing Below Here Should Need Changing ------------------#


#----------------------------------------------------------#
#                       Imports                            #
#----------------------------------------------------------#

import copy
import getopt
import os
from   stat import *
import sys


#----------------------------------------------------------#
#                 Aliases & Redefinitions                  #
#----------------------------------------------------------#



#----------------------------------------------------------#
#                Constants & Literals                      #
#----------------------------------------------------------#



#####
# General Program Constants
#####

MAXINCLUDES  =  50          # Maximum number of includes allowed
TGTSEQFLG    =  ""          # Indicates non-stat() in SeqTypes table

#####
# Message Formatting Constants
#####

# Make sure these make sense: MAXLINELEN > PADWIDTH + WRAPINDENT
# because of the way line conditioning/wrap works.

MAXLINELEN   =  75          # Default max printed line length
PADCHAR      =  " "         # Padding character
PADWIDTH     =  30          # Column width
LSTPAD       =  13          # Padding to use when dumping lists
WRAPINDENT   =   8          # Extra indent on wrapped lines
MINLEN       =  PADWIDTH + WRAPINDENT + 1  # Minimum line length


#####
# Literals
#####

ALL          =  "All"       # Rename target is whole filename
COMMENT      =  "#"         # Comment character in include files
ESC          =  "\\"        # Escape character
EXT          =  "Ext"       # Rename target is extension
EXTDELIM     =  "."         # Extension delimeter
INCL         =  "-I"        # Include file command line option
NAM          =  "Nam"       # Rename target is name
RENSEP       =  "="         # Rename command separator: oldRENSEPnew


#----------------------------------------------------------#
#              Prompts, & Application Strings              #
#----------------------------------------------------------#


#####
# Debug Messages
#####

DEBUGFLAG     =   "-d"
dCMDLINE      =   "Command Line"
dCURSTATE     =   "Current State Of Program Options"
dDEBUG        =   "DEBUG"
dDUMPOBJ      =   "Dumping Object %s"
dINCLUDING    =   "Including file '%s'"
dPROGENV      =   "$" + PROGENV
dRENREQ       =   "Renaming Request:"
dRESOLVEDOPTS =   "Resolved Command Line"
dSEPCHAR      =   "-"     # Used for debug separator lines
dSEQATIME     =   "Access Time Sequence:"
dSEQCMD       =   "Command Line Sequence:"
dSEQCTIME     =   "Creation Time Sequence:"
dSEQDEV       =   "Device Sequence:"
dSEQGID       =   "GID Sequence:"
dSEQINO       =   "Inode Sequence:"
dSEQMODE      =   "Mode Sequence:"
dSEQMTIME     =   "Modification Time Sequence:"
dSEQNLINK     =   "Nlinks Sequence"
dSEQSIZE      =   "Size Sequence:"
dSEQTARGS     =   "Rename Targets:"
dSEQUID       =   "UID Sequence:"


#####
# Error Messages
#####

eBADARG       =  "Invalid command line: %s!"
eBADINCL      =  "option %s requires argument" % INCL
eBADNEWOLD    =  "Bad -r argument '%s'!  Requires exactly one new, old string separator (Default: " + RENSEP + ")"
eBADLEN       =  "Bad line length '%s'!"
eERROR        =  "ERROR"
eFILEOPEN     =  "Cannot open file '%s': %s!"
eLINELEN      =  "Specified line length too short!  Must be at least %s" % MINLEN
eTOOMANYINC   =  "Too many includes! (Max is %d) Possible circular reference?" % MAXINCLUDES


#####
# Warning Messages
#####


#####
# Usage Prompts
#####

uTable = [PROGVER,
          HOMEPAGE,
          "usage:  " + PROGNAME + " [-1abCcdEefghqtvwXx] [-I file] [-l string] [-r old=new]... file|dir file|dir ...",
          "   where,",
          "         -1            Rename only the first instance of the specified string (Default)",
          "         -a            Rename within the entire file or directory name (Default)",
          "         -C            Do case-sensitive renaming (Default)",
          "         -c            Collapse case when doing string substitution.",
          "         -d            Dump debugging information",
          "         -e            Only perform renaming within extension portion of or directory name.",
          "         -E            Continue renaming even after an error is encountered",
          "         -f            Force renaming even if target file or directory name already exists.",
          "         -g            Replace all instances (global rename) of the old string with the new.",
          "         -h            Print help information.",
          "         -I file       Include command line arguments from file",
          "         -l string     File extension delimiter string. (Default: .)",
          "         -q            Quiet mode, do not show progress.",
          "         -R rensep     Separator string for -r rename arguments. (Default: =)",
          "         -r old=new    Replace old with new in file or directory names.",
          "         -t            Test mode, don't rename, just show what the program *would* do",
          "         -v            Print detailed program version information and exit.",
          "         -w            Line length of diagnostic and error output (Default: 75)",
          "         -X            Treat the renaming strings literally (Default)",
          "         -x            Treat the old replacement string as a Python regular expression",
         ]

#----------------------------------------------------------#
#          Global Variables & Data Structures              #
#----------------------------------------------------------#

# Program toggle and option defaults

ProgramOptions    = {

                     "DEBUG"         : False,     # Debugging off
                     "CASESENSITIVE" : True,      # Search is case-sensitive
                     "ERRORCONTINUE" : False,     # Do not continue after error
                     "FORCERENAM"    : False,     # Do not rename if target already exists
                     "GLOBAL"        : False,     # Only rename first instance of old string
                     "QUIET"         : False,     # Display progress
                     "REGEX"         : False,     # Do not treat old string as a regex
                     "TARGET"        : ALL,       # Can be "All", "Name", or "Ext"
                     "TESTMODE"      : False      # Global data structures
                    }


#--------------------------- Code Begins Here ---------------------------------#


#----------------------------------------------------------#
#             Object Base Class Definitions                #
#----------------------------------------------------------#


#####
# Container For Holding Rename Targets
#####

class RenameTargets:

    """ 
        This class is used to keep track of all the files and/or
        directories we're renaming.  When __init__ finishes,
        RenNames dictionary will be populated as follows:

        fully-qualified name : [ basename,
                                 stat information for the entry,
                                 position in command line args list (0-based)
                                 ascending alpha order of rename targets (O-based)
                                 descending alpha order of rename targets (0-based)
                                 ascending order of appearance by-mode (O-based)
                                 descending order of appearance by-mode (0-based)
                                 ascending order of appearance by-inode (O-based)
                                 descending order of appearance by-inode (0-based)
                                 ascending order of appearance by-devno (O-based)
                                 descending order of appearance by-devno (0-based)
                                 ascending order of appearance by-nlinks (O-based)
                                 descending order of appearance by-nlinks (0-based)
                                 ascending order of appearance by-uid (O-based)
                                 descending order of appearance by-uid (0-based)
                                 ascending order of appearance by-gid (O-based)
                                 descending order of appearance by-gid (0-based)
                                 ascending order of appearance by-atime (0-based)
                                 descending order of appearance by-atime (0-based)
                                 ascending order of appearance by-ctime (0-based)
                                 descending order of appearance by-ctime (0-based)
                                 ascending order of appearance by-mtime (0-based)
                                 descending order of appearance by-mtime (0-based)
                                 ascending order of appearance by-size  (0-based)
                                 descending order of appearance by-size  (0-based)
                               ]
    """

    def __init__(self, targs):

        # Dictionary of all rename targets and their stat info

        self.RenNames   =   {}

        # Ordered lists used by sequence renaming tokens

        args       =   {}      # Keys = 0, Values = Rename targets from command line
        modes      =   {}      # Keys = modes, Values = List of corresponding files
        inodes     =   {}      # Keys = inodes, Values = List of corresponding files
        devs       =   {}      # Keys = devs, Values = List of corresponding files
        nlinks     =   {}      # Keys = nlinks, Values = List of corresponding files
        uids       =   {}      # Keys = uids, Values = List of corresponding files
        gids       =   {}      # Keys = gids, Values = List of corresponding files
        atimes     =   {}      # Keys = atimes, Values = List of corresponding files
        ctimes     =   {}      # Keys = ctimes, Values = List of corresponding files
        mtimes     =   {}      # Keys = mtimes, Values = List of corresponding files
        sizes      =   {}      # Keys = sizes, Values = List of corresponding files

        # Populate the data structures

        cmdorder = 0
        for t in targs:

            try:
                fullname = os.path.abspath(t)
                basename = os.path.basename(t)
                stats    = os.stat(fullname)
            except (IOError, OSError) as e:
                ErrorMsg(eFILEOPEN % (t, e.args[1]))
                sys.exit(1)


            # This data structure is used to keep track of everything
            # we need to build the sequence renaming token support.
            # This makes it easy to add more types later on.

            SeqTypes = [ [TGTSEQFLG, args,     dSEQTARGS],
                         [ST_MODE,   modes,    dSEQMODE],
                         [ST_INO,    inodes,   dSEQINO],
                         [ST_DEV,    devs,     dSEQDEV],
                         [ST_NLINK,  nlinks,   dSEQNLINK],
                         [ST_UID,    uids,     dSEQUID],
                         [ST_GID,    gids,     dSEQGID],
                         [ST_ATIME,  atimes,   dSEQATIME],
                         [ST_CTIME,  ctimes,   dSEQCTIME],
                         [ST_CTIME,  mtimes,   dSEQMTIME],
                         [ST_SIZE,   sizes,    dSEQSIZE],
                       ]

            # Incrementally build lists of keys that will later be
            # used to create sequence renaming tokens

            for seqtype in SeqTypes:

                # Handle os.stat() values

                if seqtype[0] != TGTSEQFLG:
                    statval = stats[seqtype[0]]

                # Handle non os.stat() stuff
                else:
                    statval = TGTSEQFLG

                # Where to put the results
                vals = seqtype[1]

                if statval in vals:
                    vals[statval].append(fullname)
                else:
                    vals[statval] = [fullname]
            
            self.RenNames[fullname] = [basename, stats, cmdorder]
            cmdorder += 1

        # Create the various sorted views we may need 
        # for sequence renaming tokens

        for seqtype in SeqTypes:

            view      = seqtype[1]
            debugmsg  = seqtype[2]
            vieworder = view.keys()
            vieworder.sort()            

            # Sort alphabetically when multiple filenames
            # map to the same key, creating overall
            # ordering as we go.

            t = []
            for i in vieworder:
                view[i].sort()
                for j in view[i]:
                    t.append(j)

            # Now store the ascending- and descending order it
            # the master dictionary

            tblz = len(t)

            for name in t:
                self.RenNames[name].append(t.index(name))             # Ascending index
                self.RenNames[name].append(tblz - t.index(name) - 1)  # Descending Index
            

            if ProgramOptions["DEBUG"]:
                l=[]
                for item in vieworder:
                    DumpList(DebugMsg, debugmsg, item, view[item])

        if ProgramOptions["DEBUG"]:

            SEPARATOR = dSEPCHAR * MAXLINELEN
            DebugMsg(SEPARATOR)
            DebugMsg(dDUMPOBJ % str(self))
            DebugMsg(SEPARATOR)

            # Dump abspath, basename, & stat information

            for name in self.RenNames:
                DumpList(DebugMsg, name, "", self.RenNames[name])

            DebugMsg(SEPARATOR)

        # Now get rid of the working dictionaries to free up their memory

        del args, modes, inodes, devs, nlinks, uids, gids, atimes, ctimes, mtimes, sizes

# End of class 'RenameTargets'
    

#----------------------------------------------------------#
#             Supporting Function Definitions              #
#----------------------------------------------------------#


#####
# Turn A List Into Columns With Space Padding
#####

def ColumnPad(list, padchar=PADCHAR, padwidth=PADWIDTH):

    retval = ""
    for l in list:
        l = str(l)
        retval += l + ((padwidth - len(l)) * padchar)

    return retval

# End of 'ColumnPad()'


#####
# Condition Line Length With Fancy Wrap And Formatting
#####

def ConditionLine(msg, 
                  padchar=PADCHAR, \
                  padwidth=PADWIDTH, \
                  wrapindent=WRAPINDENT ):

    retval = []
    retval.append(msg[:MAXLINELEN])
    msg = msg[MAXLINELEN:]

    while msg:
        msg = padchar * (padwidth + wrapindent) + msg
        retval.append(msg[:MAXLINELEN])
        msg = msg[MAXLINELEN:]

    return retval

# End of 'ConditionLine()'


#####
# Print A Debug Message
#####

def DebugMsg(msg):
 
   l = ConditionLine(msg)
   for msg in l:
        PrintStderr(PROGNAME + " " + dDEBUG + ": " + msg)

# End of 'DebugMsg()'


#####
# Debug Dump Of A List
#####

def DumpList(handler, msg, listname, content):

    handler(msg)
    itemarrow = ColumnPad([listname, " "], padwidth=LSTPAD)
    handler(ColumnPad([" ", " %s %s" % (itemarrow, content)]))

# End of 'DumpList()'


#####
# Dump The State Of The Program
#####

def DumpState():

    SEPARATOR = dSEPCHAR * MAXLINELEN
    DebugMsg(SEPARATOR)
    DebugMsg(dCURSTATE)
    DebugMsg(SEPARATOR)

    opts = ProgramOptions.keys()
    opts.sort()
    for o in opts:
        DebugMsg(ColumnPad([o, ProgramOptions[o]]))

    DebugMsg(SEPARATOR)


# End of 'DumpState()'


#####
# Print An Error Message
#####

def ErrorMsg(emsg):

    l = ConditionLine(emsg)

    for emsg in l:
        PrintStderr(PROGNAME + " " + eERROR + ": " + emsg)

# End of 'ErrorMsg()'

#####
# Split -r Argument Into Separate Old And New Strings
#####

def GetOldNew(arg):


    escaping = False
    numseps  = 0 
    sepindex = 0

    i = 0
    while i < len(arg):

        # Scan string ignoring escaped separators

        if arg[i:].startswith(RENSEP):

            if (i > 0 and (arg[i-1] != ESC)) or i == 0:
                sepindex = i
                numseps += 1
            
            i += len(RENSEP)

        else:
            i += 1


    if numseps != 1:
        ErrorMsg(eBADNEWOLD % arg)
        sys.exit(1)

    else:
        old, new = arg[:sepindex], arg[sepindex + len(RENSEP):]
        old = old.replace(ESC + RENSEP, RENSEP)
        new = new.replace(ESC + RENSEP, RENSEP)
        return [old, new]

# End of 'GetOldNew()'


#####
# Print To stderr
#####

def PrintStderr(msg, trailing="\n"):
    sys.stderr.write(msg + trailing)

# End of 'PrintStderr()'


#####
# Print To stdout
#####

def PrintStdout(msg, trailing="\n"):
    sys.stdout.write(msg + trailing)

# End of 'PrintStdout'


#####
# Process Include Files On The Command Line
#####

def ProcessIncludes(OPTIONS):

    # Make sure the include argument has trailing space or the
    # code below will break.

    OPTIONS = " ".join(OPTIONS).replace(INCL, INCL+" ").split()

    # Resolve include file references allowing for nested includes.
    # This has to be done here separate from the command line options so
    # that getopt() processing below will "see" the included statements.


    NUMINCLUDES = 0
    while " ". join(OPTIONS).find(INCL) > -1:
    
        # Get the index of the next include to process.
        # It cannot be the last item because this means the filename
        # to include is missing.

        i = OPTIONS.index(INCL)
        if i == len(OPTIONS)-1:
            ErrorMsg(eBADARG % eBADINCL)
            sys.exit(1)
    
        file = OPTIONS[i+1] ; lhs = OPTIONS[:i] ; rhs = OPTIONS[i+2:]
    
        # Keep track of- and limit the number of includes allowed
        # This is an easy way to stop circular (infinite) includes.
    
        NUMINCLUDES += 1
        if NUMINCLUDES >= MAXINCLUDES:
            ErrorMsg(eTOOMANYINC)
            sys.exit(1)
    
        # Replace insert option on the command line with that file's contents.
        # Handle comments within lines.
    
        try:
            n = []
            f = open(file)
            for l in f.readlines():
                l = l.split(COMMENT)[0]
                n += l.split()
            f.close()
    
            if ProgramOptions["DEBUG"]:
                DebugMsg(dINCLUDING % file)

            OPTIONS = lhs + n + rhs
    
        except IOError as e:
            ErrorMsg(eFILEOPEN % (file, e.args[1]))
            sys.exit(1)

    return OPTIONS

# End of 'ProcessIncludes()'


#####
# Print Usage Information
#####

def Usage():
    for line in uTable:
        PrintStdout(line)

# End of 'Usage()'


#----------------------------------------------------------#
#                    Program Entry Point                   #
#----------------------------------------------------------#

#####
# Command Line Preprocessing
# 
# Some things have to be done *before* the command line
# options can actually be processed.  This includes:
#
#  1) Prepending any options specified in the environment variable.
#
#  2) Resolving any include file references
#
#  3) Separating the command line into [options ... filenames ..]
#     groupings so that user can interweave multiple options
#     and names on the command line.
#
#  4) Building the data structures that depend on the file/dir names
#     specified for renaming.  We have to do this first, because
#     -r renaming operations specified on the command line will
#     need this information if they make use of renaming tokens.
#
#####

# Process any options set in the environment first, and then those
# given on the command line


OPTIONS = sys.argv[1:]

envopt = os.getenv(PROGENV)
if envopt:
    OPTIONS = envopt.split() + OPTIONS


# Check for debug manually to see if we want
# debug info about includes

if DEBUGFLAG in OPTIONS:
    ProgramOptions["DEBUG"] = True

# Deal with include files

OPTIONS = ProcessIncludes(OPTIONS)

# Check for debug manually again before we process the options
# to get debug info on command line expansion

if DEBUGFLAG in OPTIONS:
    ProgramOptions["DEBUG"] = True

RenRequests = []

try:
    opts, args = getopt.getopt(OPTIONS, '1abbCcdEefghl:qR:r:tvw:Xx]')
except getopt.GetoptError as e:
    ErrorMsg(eBADARG % e.args[0])
    sys.exit(1)

# Now process the options

for opt, val in opts:

    if opt == "-1":
        ProgramOptions["GLOBAL"] = False
    if opt == "-a":
        ProgramOptions["TARGET"] = ALL
    if opt == "-b":
        ProgramOptions["TARGET"] = NAM
    if opt == "-C":
        ProgramOptions["CASESENSITIVE"] = True
    if opt == "-c":
        ProgramOptions["CASESENSITIVE"] = False
    if opt == "-d":
        DumpState()
    if opt == "-E":
        ProgramOptions["ERRORCONTINUE"] = True
    if opt == "-e":
        ProgramOptions["TARGET"] = EXT
    if opt == "-f":
        ProgramOptions["FORCERENAM"] = True
    if opt == "-g":
        ProgramOptions["GLOBAL"] = True
    if opt == "-h":
        Usage()
        sys.exit(0)
    if opt == "-l":
        EXTDELIM = val
    if opt == "-q":
        ProgramOptions["QUIET"] = True
    if opt == '-R':
        RENSEP = val
    if opt == "-r":
        old, new = GetOldNew(val)
        RenRequests.append([old,new, copy.deepcopy(ProgramOptions)])
    if opt == "-t":
        ProgramOptions["TESTMODE"] = True
    if opt == "-v":
        PrintStdout(RCSID)
        sys.exit(0)
    if opt == "-w":
        try:
            l = int(val)
        except:
            ErrorMsg(eBADLEN % val)
            sys.exit(1)
        if l < MINLEN:
            ErrorMsg(eLINELEN)
            sys.exit(1)
        MAXLINELEN = l
    if opt == "-X":
        ProgramOptions["REGEX"] = False
    if opt == "-x":
        ProgramOptions["REGEX"] = True


if ProgramOptions["DEBUG"]:

    # Dump what we know about the command line

    DumpList(DebugMsg, dCMDLINE, "", sys.argv)
    DumpList(DebugMsg, dPROGENV, "", envopt)
    DumpList(DebugMsg, dRESOLVEDOPTS, "", OPTIONS)

# Create and populate an object with rename targets.  We have to
# do *before* we process any renaming requests because they may make
# reference to renaming tokens that only can be resolved with the
# contents of the 'targs' data structure.

targs = None
if args:
    targs = RenameTargets(args)

# Display outstanding renaming requests if we're debugging

if ProgramOptions["DEBUG"]:
    for i in RenRequests:
        DumpList(DebugMsg, dRENREQ , "",  i)

# Release the target container if we created one

if targs:
    del targs