# -*- coding: iso-8859-1 -*-
# vim: set ft=python ts=3 sw=3 expandtab:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
#              C E D A R
#          S O L U T I O N S       "Software done right."
#           S O F T W A R E
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# Copyright (c) 2002-2003 Kenneth J. Pronovici.
# All rights reserved.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License,
# Version 2, as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Copies of the GNU General Public License are available from
# the Free Software Foundation website, http://www.gnu.org/.
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# Author   : Kenneth J. Pronovici <pronovic@ieee.org>
# Language : Python (>= 2.2)
# Project  : Cedar Backup
# Revision : $Id: filesystem.py,v 1.11 2002/09/20 01:41:38 pronovic Exp $
# Purpose  : Provides filesystem-related functionality for the project
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

# This file was created with a width of 132 characters, and NO tabs.

########
# Notes
########

# This file is not intended to be an executable script.  Instead, it
# is a Python module file that provides common functionality for Cedar
# Backup scripts.  Import this file to use the functionality.


######################
# Pydoc documentation
######################

"""
Provides filesystem-related functionality.

Functions that start with _ should be considered private to this
module, and should not be used by code outside this module.
"""

__author__  = "Kenneth J. Pronovici"


########################################################################
# Imported modules
########################################################################

# System modules
import os
import re
import popen2
import string
import tempfile
import time
import pwd
import grp
import shutil
import sha
import pickle
import exceptions

# Cedar Backup modules
from CedarBackup.exceptions import CedarBackupError


#######################################################################
# Module-wide configuration and constants
#######################################################################

# External programs
BZIP2_PROGRAM      = 'bzip2'
COMPRESS_PROGRAM   = 'compress'
DU_PROGRAM         = 'du'
GZIP_PROGRAM       = 'gzip'
MOUNT_PROGRAM      = 'mount'
TAR_PROGRAM        = 'tar'
UNMOUNT_PROGRAM    = 'umount'

# Control constants
TAR_MAX_ARGS       = 500

# General conversion constants
SECONDS_PER_MINUTE = 60
MINUTES_PER_HOUR   = 60
HOURS_PER_DAY      = 24
SECONDS_PER_DAY    = SECONDS_PER_MINUTE * MINUTES_PER_HOUR * HOURS_PER_DAY

BYTES_PER_KBYTE    = 1024.0
KBYTES_PER_MBYTE   = 1024.0
BYTES_PER_MBYTE    = BYTES_PER_KBYTE * KBYTES_PER_MBYTE
BYTES_PER_SECTOR   = BYTES_PER_KBYTE * 2
KBYTES_PER_SECTOR  = 2
 
# Path defaults
DEF_IGNORE_FILE    = ".cbignore"


#######################################################################
# Public functions
#######################################################################

#############################
# execute_command() function
#############################

def execute_command(command):

   """
   Executes a shell command, hopefully in a safe way (UNIX-specific).

   This function exists to replace direct calls to os.popen() in the Cedar
   Backup code.  Since os.popen() takes a command string, this function does,
   too.

   It's not safe to call a function such as os.popen() with untrusted
   arguments, since that can cause problems if the string contains non-safe
   variables or other constructs (imagine that the argument is $WHATEVER, but
   $WHATEVER contains something like "; rm -fR ~/; echo" in the current
   environment).  

   It's safer to use popen[234] and pass a list rather than a string for the
   first argument.  When called this way, popen[234] will use the list's first
   item as the command and the remainder of the list's items as arguments to
   that command.

   This function takes the command string, splits it into a list, and then 
   uses the popen2.Popen4 class to execute the command.  It uses a specialized
   little piece of regex code I found on Google to do the split.  For instance,
   the string 'tar -cvf "/boot", /stuff, "/more stuff"' is split into the list
   ['tar', '-cvf', '/boot', '/stuff', '/more stuff'].

   Note: You cannot use single quotes to group arguments - you must use double
   quotes.  So, in the example above, while "/more stuff" is valid, 'more stuff'
   will get split into two arguments.

   Note: Output redirection (i.e. 2>&1, 2>/dev/null, etc.) does not work when
   using this function.  The redirection string will be passed to the command
   just like any other argument.  The returned lines are a combination of 
   stdout and stderr.

   Arguments:

      - **command** : Command to execute, in same form as passed to os.popen()

   Returns a tuple '(result, lines)' where 'result' is the wait-encoded return
   value and 'lines' is all of the stdout and stderr output from the command.
   """

   fields = re.findall('[^ "]+|"[^"]+"', command)
   fields = map(lambda field: field.replace('"', ''), fields)

   pipe = popen2.Popen4(fields)

   pipe.tochild.close()    # we'll never write to it, and this way we don't confuse anything.

   lines = pipe.fromchild.readlines()
   result = pipe.wait();

   return(result, lines)


############################
# copy_local_dir() function
############################

def copy_local_dir(sdir, ddir, user=None, group=None):

   ######################
   # Pydoc documentation
   ######################

   """
   Copies local files from a source directory to a destination directory.

   Note: this function is *not* recursive, i.e. only the files in the
   directory will be copied.

   Arguments:

      - **sdir** : Source directory

      - **ddir** : Destination directory

      - **user** : User to change ownership to

      - **group** : Group to change ownership to 

   Returns a dictionary of results:

      - 'errors' : List of error messages or None

   If the 'errors' entry is None, the call was successful.

   Optionally, use 'user' and 'group' (together, not separately) to indicate
   what the ownership of the copied files should be set to.
   """

   #######################
   # Initialize variables
   #######################

   results             =  { }
   results['errors']   = None

   errors              = [ ]


   #####################################################
   # Use a big try statement, for easier error-handling
   #####################################################

   try:

      if not os.path.isabs(sdir):
         errors.append("Error: source directory '%s' is not an absolute path.\n" % sdir)
         raise CedarBackupError()

      if not os.path.isdir(sdir):
         errors.append("Error: source directory '%s' is not a directory.\n" % sdir)
         raise CedarBackupError()

      if not os.access(sdir, os.R_OK):
         errors.append("Error: source directory '%s' is not readable.\n" % sdir)
         raise CedarBackupError()

      if user is not None and group is not None:
         try:
            uid = pwd.getpwnam(user)[2]
            gid = grp.getgrnam(group)[2]
         except:
            errors.append("Error: passed-in user '%s' group '%s' combination is invalid.\n" % ( user, group))
            raise CedarBackupError()

      for name in os.listdir(sdir):
         sfile = os.path.join(sdir, name)
         dfile = os.path.join(ddir, name)
         if not os.path.islink(sfile) and os.path.isfile(sfile):
            try:
               shutil.copy(sfile, dfile)
            except:
               errors.append("Error: unable to copy file '%s' to '%s' using Python built-ins.\n" % (sfile, dfile))
               raise CedarBackupError()

            if user is not None and group is not None:
               try:
                  os.chown(dfile, uid, gid)
               except:
                  errors.append("Error: unable to change ownership of '%s' to '%s:%s' using Python built-ins.\n" % (
                                dfile, user, group))
                  raise CedarBackupError()

 
   #####################################
   # Handle all Cedar Backup exceptions
   #####################################

   except CedarBackupError:
      pass


   #####################
   # Return the results
   #####################

   if len(errors) > 0:
      results['errors'] = errors

   return results


#############################
# copy_remote_dir() function
#############################

def copy_remote_dir(rcp, rhost, sdir, ddir, luser=None, ruser=None):

   ######################
   # Pydoc documentation
   ######################

   """
   Copies all files in a directory from a remote host to the local host.

   Note: this function is *not* recursive, i.e. only the files in the
   directory will be copied.

   Arguments:

      - **rcp** : Remote copy command

      - **rhost** : Remote host to copy from

      - **sdir** : Source directory

      - **ddir** : Destination directory

      - **luser** : Local username

      - **ruser** : Remote username

   Returns a dictionary of results:

      - 'errors' : List of error messages or None

      - 'warnings' : List of warning messages or None

      - 'output' : Output from external command(s), as a list of strings

   If the 'errors' entry is None, the call was successful.

   If 'ruser' (remote-user) is not None, then the copy command will use the
   *rcp user@host* syntax.  If the 'luser' (local-user) is not None, then
   copy will be wrapped in a *su -c* command so the copy will actually be
   run as the local user.  Note that the luser functionality will *only*
   work if the effective user id of the process is root (only root can su
   to another user id without entering a password).
   """

   #######################
   # Initialize variables
   #######################

   results             =  { }
   results['output']   = None
   results['errors']   = None
   results['warnings'] = None

   output              = [ ]
   errors              = [ ]
   warnings            = [ ]


   #####################################################
   # Use a big try statement, for easier error-handling
   #####################################################

   try:

      ###########################
      # Execute the copy command
      ###########################

      if luser is None:
         if ruser is None:
            rcp_cmd = "%s %s:%s/* %s" % (rcp, rhost, sdir, ddir)
         else:
            rcp_cmd = "%s %s@%s:%s/* %s" % (rcp, ruser, rhost, sdir, ddir)
      else:
         if os.getuid() != 0:
            errors.append("Error: only the root user may do remote copies as a specific user.\n")
            raise CedarBackupError()
         if ruser is None:
            rcp_cmd = 'su %s -c "%s %s:%s/* %s"' % (luser, rcp, rhost, sdir, ddir)
         else:
            rcp_cmd = 'su %s -c "%s %s@%s:%s/* %s"' % (luser, rcp, ruser, rhost, sdir, ddir)

      (result, lines) = execute_command(rcp_cmd)
      output += lines
      if result != 0:
         errors.append("Error executing remote copy command.\n")
         errors.append("Command was [%s].\n" % rcp_cmd)
         raise CedarBackupError()


   #####################################
   # Handle all Cedar Backup exceptions
   #####################################

   except CedarBackupError:
      pass


   #####################
   # Return the results
   #####################

   if len(output) > 0:
      results['output'] = output
   if len(errors) > 0:
      results['errors'] = errors
   if len(warnings) > 0:
      results['warnings'] = warnings

   return results


##############################
# copy_remote_file() function
##############################

def copy_remote_file(rcp, rhost, sfile, dfile, luser=None, ruser=None):

   ######################
   # Pydoc documentation
   ######################

   """
   Copies a single file from a remote host to the local host.

   Arguments:

      - **rcp** : Remote copy command

      - **rhost** : Remote host to copy from

      - **sfile** : Source file

      - **dfile** : Destination file

      - **luser** : Local username

      - **ruser** : Remote username

   Returns a dictionary of results:

      - 'errors' : List of error messages or None

      - 'warnings' : List of warning messages or None

      - 'output' : Output from external command(s), as a list of strings

   If the 'errors' entry is None, the call was successful.

   If 'ruser' (remote-user) is not None, then the copy command will use the
   *rcp user@host* syntax.  If the 'luser' (local-user) is not None, then
   copy will be wrapped in a *su -c* command so the copy will actually be
   run as the local user.  Note that the luser functionality will *only*
   work if the effective user id of the process is root (only root can su
   to another user id without entering a password).
   """

   #######################
   # Initialize variables
   #######################

   results             =  { }
   results['output']   = None
   results['errors']   = None
   results['warnings'] = None

   output              = [ ]
   errors              = [ ]
   warnings            = [ ]


   #####################################################
   # Use a big try statement, for easier error-handling
   #####################################################

   try:

      ###########################
      # Execute the copy command
      ###########################

      if luser is None:
         if ruser is None:
            rcp_cmd = "%s %s:%s %s" % (rcp, rhost, sfile, dfile)
         else:
            rcp_cmd = "%s %s@%s:%s %s" % (rcp, ruser, rhost, sfile, dfile)
      else:
         if os.getuid() != 0:
            errors.append("Error: Only the root user may do remote copies as a specific user.\n")
            raise CedarBackupError()
         if ruser is None:
            rcp_cmd = 'su %s -c "%s %s:%s %s"' % (luser, rcp, rhost, sfile, dfile)
         else:
            rcp_cmd = 'su %s -c "%s %s@%s:%s %s"' % (luser, rcp, ruser, rhost, sfile, dfile)

      (result, lines) = execute_command(rcp_cmd)
      output += lines
      if result != 0:
         errors.append("Error executing remote copy command.\n")
         errors.append("Command was [%s].\n" % rcp_cmd)
         raise CedarBackupError()


   #####################################
   # Handle all Cedar Backup exceptions
   #####################################

   except CedarBackupError:
      pass


   #####################
   # Return the results
   #####################

   if len(output) > 0:
      results['output'] = output
   if len(errors) > 0:
      results['errors'] = errors
   if len(warnings) > 0:
      results['warnings'] = warnings

   return results


####################################
# local_indicator_exists() function
####################################

def local_indicator_exists(indicator):

   """
   Returns True/False whether indicator exists on the local machine.

   Arguments:

      - **indicator** : Absolute path of indicator file
   """

   return os.path.isabs(indicator) and os.path.isfile(indicator)


#####################################
# remote_indicator_exists() function
#####################################

def remote_indicator_exists(rcp, rhost, indicator, working_dir, luser=None, ruser=None):

   """
   Returns True/False whether the indicator exists on the remote machine.

   Arguments:

      - **rcp** : Remote copy command

      - **rhost** : Remote host to check indicator on

      - **indicator** : Absolute path of indicator file on remote host

      - **working_dir** : Working directory that can be used for temporary storage

      - **luser** : Local username

      - **ruser** : Remote username
   """

   if not os.path.isabs(indicator):
      return 0

   tempfile.tempdir = working_dir
   temp_file = tempfile.mktemp()

   call = copy_remote_file(rcp, rhost, indicator, temp_file, luser, ruser)

   try:
      os.remove(temp_file)
   except:
      pass

   if call['errors'] is not None:
      return 0
   else:
      return 1


#############################
# write_indicator() function
#############################

def write_indicator(indicator, user=None, group=None):

   """
   Creates the local indicator file as specified.

   Arguments:

      - **indicator** : Absolute path of indicator file 

      - **user** : User to change ownership to

      - **group** : Group to change ownership to 

   Returns a dictionary of results:

      - 'errors' : List of error messages or None

   If the 'errors' entry is None, the call was successful.

   Use 'user' and 'group' (together, not separately) to indicate
   what the ownership of the indicator file should be set to.
   """

   results           = { }
   results['errors'] = None

   errors            = [ ]

   try:
      open(indicator, "w").write("")
      if user is not None and group is not None:
         try:
            uid = pwd.getpwnam(user)[2]
            gid = grp.getgrnam(group)[2]
            os.chown(indicator, uid, gid)
         except:
            errors.append("Unable to change ownership on file '%s' using Python built-ins.\n" % indicator)
   except:
      errors.append("Unable to create file '%s'.\n" % indicator)

   if len(errors) > 0:
      results['errors'] = errors

   return results


#######################
# tree_size() function
########################

def tree_size(dir):

   """
   Returns the size of a directory tree.

   Upon error, None is returned.

   The size is returned as a dictionary containing the size in bytes,
   kbytes, 2-k blocks and megabytes (keys 'bytes', 'kbytes', 'blocks',
   'mbytes').

   Unlike the expand_tree() function, this function doesn't take into
   account exclusions.
   """

   # We use du to get the size.  It's assumed that the -k option (to return
   # the size in kbytes) is available.

   du_pattern = re.compile(r"(^)([0-9]*)([ \t]*)([0-9]*)(.*$)")

   du_cmd = "%s -sk %s" % (DU_PROGRAM, dir)
   (result, lines) = execute_command(du_cmd)
   if result != 0:
      return None
  
   parsed = du_pattern.search(lines[0])
   if not parsed:
      return None

   kbytes = int(parsed.group(2))

   size =  { 'bytes'  : kbytes * BYTES_PER_KBYTE,
             'kbytes' : kbytes,
             'blocks' : kbytes / KBYTES_PER_SECTOR,
             'mbytes' : kbytes / KBYTES_PER_MBYTE }

   return size


#########################
# expand_tree() function
#########################

def expand_tree(search_dir, change_digest=None, ignore_file=DEF_IGNORE_FILE, exclude_list=[]):

   """
   Returns an expanded list of files in a tree, optionally excluding some files.

   Arguments:

      - **search_dir** : Directory to expand

      - **change_digest** : Dictionary used to track incremental changes

      - **ignore_file** : Filename which indicates to ignore a directory

      - **exclude_list** : List of files and/or directories to exclude

   Any file or directory in the excluded list is recursively excluded
   from the expanded list.  Any directory that contains the ignore file
   is also recursively excluded from the expanded list. 
   
   If the 'change_digest' dictionary is passed in (is not None) then the
   expanded tree will contain only files that have changed, as indicated
   by their digest value in the change_digest dictionary.  This is the
   way that incremental backups can be implemented.  

   The 'change_digest' dictionary itself will be updated to include
   the new digest value for any files that have changed.  If the
   change_digest dictionary is not None, but does not contain a digest
   value for a particular file, that file will always be added to the
   list, and the 'change_digest' dictionary will be updated to include a
   digest value for the file.
   """

   # Set up variables
   control                  = { }
   control['change_digest'] = change_digest
   control['exclude_list']  = exclude_list
   control['file_list']     = [ ] 
   control['ignore_file']   = ignore_file

   # Build the file list using the visit function
   os.path.walk(search_dir, _visit_expand_tree, control)

   # Add the search dir to the list (if there are no other files, keep the list empty)
   if len(control['file_list']) > 0:
      control['file_list'].insert(0, search_dir)

   # Return the results
   return control['file_list']


########################
# aged_files() function
########################

def aged_files(dir, age):

   """
   Provides a list of files last written to more than a certain number of days ago.
   """

   control         = { }
   control['age']  = age
   control['list'] = [ ]
   os.path.walk(dir, _visit_aged_files, control)
   return control['list']


########################
# empty_dirs() function
########################

def empty_dirs(dir):
   """Provides a list of all empty directories within another directory."""
   control         = { }
   control['dir']  = dir
   control['list'] = [ ]
   os.path.walk(dir, _visit_empty_dirs, control)
   return control['list']


######################
# tar_tree() function
######################

def tar_tree(dir, tarfile, 
             working_dir=None, method="tar", 
             digest_file=None, reset_digest=False,
             ignore_file=DEF_IGNORE_FILE, exclude_list=[], 
             user=None, group=None):

   ######################
   # Pydoc documentation
   ######################

   """
   Creates a tarfile based on the passed-in directory.

   Note: this function relies on GNU tar functionality that is probably
   not available in other versions of tar.

   Arguments:

      - **dir** : Directory to be tar'd up

      - **tarfile** : Final name of file to be created

      - **working_dir** : Working directory to be used for temp files

      - **method** : Tar method (see below)

      - **digest_file** : Path to incremental change digest file on disk

      - **reset_digest** : Indicates whether to reset (clear) change digest

      - **ignore_file** : Filename which indicates to ignore a directory

      - **exclude_list** : List of files and/or directories to exclude

      - **user** : User that tarfile should be owned by

      - **group** : Group that tarfile should be owned by

   Returns a dictionary of results:

      - 'errors' : List of error messages or None

      - 'warnings' : List of warning messages or None

      - 'output' : Output from external command(s), as a list of strings

   If the 'errors' entry is None, the call was successful.

   Available tar methods are:

      - *tar*: Simple tar, no compression (i.e. 'file.tar')

      - *targz*: Gzipped tarfile (i.e. 'file.tar.gz')

      - *tarbz*: Bzipped tarfile (i.e. 'file.tar.bz2')

      - *tarz*: Compressed tarfile (i.e. 'file.tar.Z')

   Note that to use a compressed tar method, you must have the
   appropriate compressor installed.

   Optionally, use user and group (together, not separately) to indicate
   what the ownership of the tarfile should be set to after being
   created.

   The 'digest_file' argument is the name of the change digest file on
   disk for this directory.  If a 'digest_file' is passed in (i.e. is not
   None), then an incremental backup will be performed.  The change
   digest will be loaded from disk (if it already exists) or created
   from scratch (if it does not exist).  Only files that do not have
   a digest value, or have a digest value different from that in the
   'digest_file', will be added to tarfile.  If the 'reset_digest' argument
   is True and the 'digest_file' argument is not none, the function will
   act as if any existing digest is empty, and consequently all files
   will be backed up (having this flag makes it easy for higher-level
   functionality to decide when to stop doing incremental backups:
   weekly, month, etc.).
   """

   #######################
   # Initialize variables
   #######################

   results             =  { }
   results['output']   = None
   results['errors']   = None
   results['warnings'] = None

   output              = [ ]
   errors              = [ ]
   warnings            = [ ]

   temp_file           = ""
   result_file         = ""


   #####################################################
   # Use a big try statement, for easier error-handling
   #####################################################

   try:

      try:

         #################
         # Validate input
         #################

         if not os.path.isabs(dir):
            errors.append("Error: collect directory '%s' is not an absolute path.\n" % dir)
            raise CedarBackupError()

         if not os.path.isdir(dir):
            errors.append("Error: collect directory '%s' is not a directory.\n" % dir)
            raise CedarBackupError()

         if not os.path.isabs(tarfile):
            errors.append("Error: tarfile '%s' is not an absolute path.\n" % tarfile)
            raise CedarBackupError()

         tempfile.tempdir = working_dir
         temp_file = tempfile.mktemp()

         if method not in ['tar', 'targz', 'tarbz2', 'tarz']:
            errors.append("Error: method '%s' is not one of [tar, targz, tarbz2, tarz].\n" % method)
            raise CedarBackupError()

         if (user is not None and group is None) or (user is None and group is not None):
            errors.append("Error: to reset ownership on file, pass in both user and group (neither were given).\n")
            raise CedarBackupError()

         if user is not None and group is not None:
            try:
               uid = pwd.getpwnam(user)[2]
               gid = grp.getgrnam(group)[2]
            except KeyError:
               errors.append("Error: passed-in user '%s' group '%s' combination is invalid.\n" % ( user, group))
               raise CedarBackupError()


         #########################
         # Load the change digest
         #########################
         # If the file is there and we can't load it, or if the file is there and we 
         # wouldn't be able to write to it, then we'll ignore it and pretend that we're
         # not doing an incremental backup.  

         if digest_file is None:
            change_digest = None
         elif digest_file is not None and reset_digest:
            change_digest = { }
         else:
            if os.path.isfile(digest_file):
               if os.access(digest_file, os.W_OK) and os.access(digest_file, os.R_OK):
                  try:
                     change_digest = pickle.load(open(digest_file, "r"))
                  except:
                     warnings.append("Warning: unable to load change digest '%s' via pickle; backing up all files.\n" % digest_file)
                     change_digest = None
               else:
                  warnings.append("Warning: change digest '%s' is not accessible; backing up all files.\n" % digest_file)
                  change_digest = None
            else:
               change_digest = { }

         
         ########################################
         # Get the list of files to be backed up
         ########################################

         filepath_list = expand_tree(dir, change_digest, ignore_file, exclude_list)


         ###########################################
         # Get out if there are no files to back up
         ###########################################

         if len(filepath_list) == 0:
            warnings.append("Warning: no files to back up in directory %s.\n" % dir)
            raise CedarBackupError()


         #############################
         # Create the initial tarfile
         #############################

         # This gets a little ugly.  What we want to be able to do is
         # stick our entire list of files on the end of the command and
         # tell tar(1) "put all of these in the archive".  However,
         # there's a line-length limit for tar(1), and we'll likely
         # exceed it for some directories, especially if we're backing up
         # ~/.mozilla/.../Cache, <sigh>.
         #
         # What we'll do instead is create an initial archive and then
         # append TAR_MAX_ARGS of the files onto the end of the command
         # each time, until we run out of files.
         #
         # --exclude is used to make sure we don't accidentally include our
         # tarfile in the archive, if we happen to be archiving the same
         # directory the tarfile will be written to. --no-recursion is used
         # because we'll be passing both files and directories to tar(1)
         # as arguments - we don't want to include the same file twice,
         # as would happen if we passed 'dir/' and 'dir/file' to tar(1)
         # without specifying --no-recursion.
         #
         # We have to make sure to put "s around the names of the files, in
         # case they include spaces - otherwise tar(1) will get confused.

         tar_cmd = '%s --no-recursion --exclude %s -cvf %s "%s"' % (TAR_PROGRAM, temp_file, temp_file, filepath_list[0])
         (result, lines) = execute_command(tar_cmd)
         output += lines
         if result != 0:
            errors.append("Error creating initial tarfile '%s'.  See results for more details.\n" % temp_file)
            errors.append("Command was [%s].\n" % tar_cmd)
            raise CedarBackupError()


         ##################################
         # Append onto the initial tarfile
         ##################################

         total = len(filepath_list)
         remaining = total - 1

         while remaining > 0:
         
            if remaining >= TAR_MAX_ARGS:
               start = total - remaining
               end = start + TAR_MAX_ARGS
               remaining -= TAR_MAX_ARGS
            else:
               start = total - remaining
               end = total 
               remaining = 0

            filepath_list_string = '"%s"' % filepath_list[start]

            if end > start:
               for filepath in filepath_list[start+1:end]:
                  filepath_list_string += ' "%s"' % filepath

            tar_cmd = "%s --no-recursion --exclude %s -rvf %s %s" % (TAR_PROGRAM, temp_file, temp_file, filepath_list_string)
            (result, lines) = execute_command(tar_cmd)
            output += lines
            if result != 0:
               errors.append("Error appending to tarfile '%s'.  See results for more details.\n" % temp_file)
               errors.append("Command was [%s].\n" % tar_cmd)
               raise CedarBackupError()
         

         #########################################
         # Build and execute the compress command
         #########################################

         if method == "tar":
               result_file = temp_file

         else:
            if method == "targz":
               compress_program = GZIP_PROGRAM
               result_file = "%s.gz" % temp_file
            elif method == "tarbz2":
               compress_program = BZIP2_PROGRAM
               result_file = "%s.bz2" % temp_file
            elif method == "tarz":
               compress_program = COMPRESS_PROGRAM
               result_file = "%s.Z" % temp_file

            compress_cmd = "%s %s" % (compress_program, temp_file)
            (result, lines) = execute_command(compress_cmd)
            output += lines
            if result != 0:
               errors.append("Error compressing tarfile '%s'.  See results for more details.\n" % temp_file)
               errors.append("Command was [%s].\n" % compress_cmd)
               # Note: temp_file is removed elsewhere
               if len(result_file) > 0 and os.path.isfile(result_file):
                  try:
                     os.remove(result_file)
                  except OSError:
                     errors.append("Error removing temporary file '%s' using Python built-in.\n" % result_file)
               raise CedarBackupError()


         ##############################################################
         # Move the tarfile to the requested name, and reset ownership
         ##############################################################
         # This is overly messy.  I'd like to use just os.rename(), but
         # that won't work across filesystem boundaries.  The other option
         # is to copy from old->new and then remove old, which will work
         # across boundaries.  That's a real waste of space, though.  I've
         # compromised.  I'll try os.rename(), and if that doesn't work,
         # I'll fall back on the copy option.

         try:
            os.rename(result_file, tarfile)
         except:
            try:
               shutil.copyfile(result_file, tarfile)
            except (IOError, os.error, OSError):
               errors.append("Error renaming file '%s' to '%s' using Python built-in.\n" % (result_file, tarfile))
               try:
                  os.remove(result_file)
               except OSError:
                  errors.append("Error removing temporary file '%s' using Python built-in.\n" % result_file)
               raise CedarBackupError()
            try:
               os.remove(result_file)
            except OSError:
               errors.append("Error removing temporary file '%s' using Python built-in.\n" % result_file)
               raise CedarBackupError()

         # Ownership will only be reset if they pass in both a user and group
         if user is not None and group is not None:
            try:
               os.chown(tarfile, uid, gid)
            except OSError:
               errors.append("Error changing file ownership of '%s' to '%s:%s'.\n" % (tarfile, uid, gid))
               try:
                  os.remove(tarfile)
               except OSError:
                  errors.append("Error removing temporary file '%s'.\n" % tarfile)
               raise CedarBackupError()
        

         ################################
         # Write the digest back to disk
         ################################
         
         # We never write the digest out until we're sure we've succeeded
         # in creating the tarfile. Why?  That way if the backup fails
         # today, at least the same files will be backed up tomorrow,
         # even if they don't change again before tomorrow.  

         if change_digest is not None and digest_file is not None:
            try:
               pickle.dump(change_digest, open(digest_file, "w"))
            except:
               warnings.append("Warning: unable to write change digest %s back to disk via pickle.\n" % digest_file)


      #####################################
      # Handle all Cedar Backup exceptions
      #####################################

      except CedarBackupError:
         pass


   ###########
   # Clean up
   ###########

   # This is done in a finally block because we want to make sure we
   # always clean up this temporary file.

   finally:


      if len(temp_file) > 0 and os.path.isfile(temp_file):
         try:
            os.remove(temp_file)
         except OSError:
            errors.append("Error removing temporary file '%s'.\n" % temp_file)


   #####################
   # Return the results
   #####################

   if len(output) > 0:
      results['output'] = output
   if len(errors) > 0:
      results['errors'] = errors
   if len(warnings) > 0:
      results['warnings'] = warnings

   return results


###############################
# consistency_check() function
###############################

def consistency_check(working_dir, device, source_dir, date_list):

   ######################
   # Pydoc documentation
   ######################

   """
   Performs a consistency check on data that has been written to disc.

   It seems that sometimes, it's possible to create a corrupted
   multisession disc (i.e. one that cannot be read) although no errors
   were encountered while writing the disc.  This consistency check
   makes sure that the data read from disc matches the data that was
   used to create the disc.

   Arguments:

      - **working_dir** : Working directory to be used for temporary files

      - **device** : Filesystem device name (i.e. '/dev/cdrw')

      - **source_dir** : Source directory to check against

      - **date_list** : List of backup dates to do checks for (format i.e. '2002/08/20')

   Returns a dictionary of results:

      - 'errors' : List of error messages and/or or differences or None

      - 'warnings' : List of warning messages or None

      - 'output' : Output from external command(s), as a list of strings

   If the "errors" entry is None, the function call was successful and
   there were no problems with the consistency check.

   The function mounts the device at a temporary mount point in the
   working directory, and then compares the indicated daily directory
   (i.e. '2002/08/20') in the source directory and at the mount point.
   The two directories are compared in two passes: the first pass checks
   to see whether the list of files in one tree is the same as the list
   in the other, and then the second pass does a digest on each of the
   files, to make sure they match.
   """

   #######################
   # Initialize variables
   #######################

   results             =  { }
   results['output']   = None
   results['errors']   = None
   results['warnings'] = None

   output              = [ ]
   errors              = [ ]
   warnings            = [ ]

   mounted             = False


   #####################################################
   # Use a big try statement, for easier error-handling
   #####################################################

   try:

      try:

         ###################
         # Mount the device
         ###################
         # First, we create the mount point.
         # Hrm.  This is probably kinda Linux specific?

         tempfile.tempdir = working_dir
         mount_point = tempfile.mktemp()
         os.mkdir(mount_point)

         mount_cmd = '%s -tiso9660 %s %s' % (MOUNT_PROGRAM, device, mount_point)
         (result, lines) = execute_command(mount_cmd)
         output += lines
         if result != 0:
            errors.append("Error mounting device '%s' at temporary mount point '%s'.  See results for more details.\n" % (
                          device, mount_point))
            errors.append("Command was [%s].\n" % mount_cmd)
            raise CedarBackupError()

         mounted = True

      
         ################################
         # Check for each passed-in date
         ################################

         # Here, we first report on directories that have different files in
         # them, and then if the directories have the same files in them, we
         # report any of the individual files that are different.
         # 
         # Note that even if one directory is bad, we continue checking the
         # the rest, and even if one file is bad within a directory, we 
         # continue checking the rest.  In this case, the more information
         # we can give back to the user, the better.

         difference = False

         for date in date_list:

            ######################################
            # Figure out which directories to use
            ######################################

            disc_dir = os.path.join(mount_point, date)
            daily_dir = os.path.join(source_dir, date)


            #########################################################
            # Compare the list of files and directories in each tree
            #########################################################
            # This might get a bit inefficient if the trees are big.

            disc_list = []
            lst = expand_tree(disc_dir)
            for f in lst:
               if f != disc_dir and os.path.isfile(f) and not os.path.islink(f):
                  disc_list.append(string.replace(f, "%s/" % disc_dir, "", 1))
            disc_list.sort()

            daily_list = []
            lst = expand_tree(daily_dir)
            for f in lst:
               if f != daily_dir and os.path.isfile(f) and not os.path.islink(f):
                  daily_list.append(string.replace(f, "%s/" % daily_dir, "", 1))
            daily_list.sort()

            if disc_list != daily_list:
               errors.append("Directory listings differ for '%s' (use 'find' by hand to see differences).\n" % date)
               difference = True
               continue    # just go on to next date - the next test would fail anyway


            ##########################################
            # Compare the digest values for each file
            ##########################################
            # Note that we can use whichever list we want as the basis for our search.  

            for f in disc_list:
               disc_file = os.path.join(disc_dir, f)
               daily_file = os.path.join(daily_dir, f)
               if _file_digest(disc_file) != _file_digest(daily_file):
                  errors.append("File '%s' differs.\n" % os.path.join(date, f))
                  difference = True

         if not difference:
            output.append("Consistency check completed for '%s'.  No problems found.\n" % source_dir)
         else:
            output.append("Consistency check completed for '%s'.  Some differences found.\n" % source_dir)
            raise CedarBackupError()


      #####################################
      # Handle all Cedar Backup exceptions
      #####################################

      except CedarBackupError:
         pass


   ###############################################
   # Unmount and remove the temporary mount point
   ###############################################
   # This is done in a 'finally' block so we can try to make sure
   # it always happens.  It wouldn't be good to leave the CD-R drive
   # stuck mounted off some odd directory no one could get to.

   finally:

      if mounted:
         unmount_cmd = '%s %s' % (UNMOUNT_PROGRAM, mount_point)
         (result, lines) = execute_command(unmount_cmd)
         output += lines
         if result != 0:
            errors.append("Error unmounting temporary mount point '%s'.  See results for more details.\n" % mount_point)
            errors.append("Command was [%s].\n" % mount_cmd)

         mounted = False

      if len(mount_point) > 0 and os.path.isdir(mount_point):
         try:
            os.rmdir(mount_point)
         except:
            errors.append("Error removing temporary mount point '%s'.\n" % mount_point)


   #####################
   # Return the results
   #####################

   if len(output) > 0:
      results['output'] = output
   if len(errors) > 0:
      results['errors'] = errors
   if len(warnings) > 0:
      results['warnings'] = warnings

   return results


#######################################################################
# Private functions
#######################################################################

##########################
# _file_digest() function
##########################

def _file_digest(filepath):

   """
   Returns an SHA message digest (in ASCII-safe form) for a particular file.

   The SHA digest value can be used to determine whether a given file
   has changed.  Note that this might not be all that fast if you have
   to run it for lots of files, but it's a better option than doing a
   stat on a file and looking for an mtime value to see if it changed.
   """

   try:
      digest = sha.new(open(filepath).read()).hexdigest()
      return digest
   except:
      return ""


###############################
# _visit_aged_files() function
###############################

def _visit_aged_files(control, dir, entries):
   """Target for os.path.walk() in the aged_files() function."""
   for entry in entries:
      entrypath = os.path.join(dir, entry)
      if os.path.isfile(entrypath) and ((time.time() - os.stat(entrypath)[9]) / SECONDS_PER_DAY) >= control['age']:
         control['list'].append(entrypath)
   return


###############################
# _visit_empty_dirs() function
###############################

def _visit_empty_dirs(control, dir, entries):
   """Target for os.path.walk() in the empty_dirs() function."""
   if dir != control['dir'] and len(entries) == 0:
      control['list'].append(dir)
   return


################################
# _visit_expand_tree() function
################################

def _visit_expand_tree(control, dir, entries):

   """Target for os.path.walk() in the expand_tree() function."""

   # This is structured so that (hopefully) we do as little work as
   # possible.  For instance, we only walk through a directory if the
   # ignore file is not in the directory, we only calculate digests on
   # files if there is a need to, etc.  Note that _file_digest() might
   # return an empty string.  We don't care for the purposes of this
   # function.
   #
   # Note: it's very important that the loop on the entries use the
   # entries[:] form to get a *copy* of the list.  Things go all to
   # hell if you try to modify a list while iterating on it (duh).

   if control['ignore_file'] in entries:
      del entries[0:]
      control['exclude_list'].append(dir)
   else:
      for entry in entries[:]:
         entrypath = os.path.join(dir, entry)
         if entrypath in control['exclude_list']:
            for i in range(0, entries.count(entry)):
               entries.remove(entry)
         else:
            if control['change_digest'] is not None:
               if control['change_digest'].has_key(entrypath):
                  digest = _file_digest(entrypath)
                  if digest != control['change_digest'][entrypath]:
                     control['change_digest'][entrypath] = digest
                     control['file_list'].append(entrypath)
               else:
                  control['change_digest'][entrypath] = _file_digest(entrypath) 
                  control['file_list'].append(entrypath)
            else:
               control['file_list'].append(entrypath)


########################################################################
# Module entry point
########################################################################

# Ensures that the module isn't run if someone just imports it.
if __name__ == '__main__':
   print "Module just exists to be imported, sorry."


