Package CedarBackup2 :: Package actions :: Module collect
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.actions.collect

  1  # -*- coding: iso-8859-1 -*- 
  2  # vim: set ft=python ts=3 sw=3 expandtab: 
  3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  4  # 
  5  #              C E D A R 
  6  #          S O L U T I O N S       "Software done right." 
  7  #           S O F T W A R E 
  8  # 
  9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 10  # 
 11  # Copyright (c) 2004-2008 Kenneth J. Pronovici. 
 12  # All rights reserved. 
 13  # 
 14  # This program is free software; you can redistribute it and/or 
 15  # modify it under the terms of the GNU General Public License, 
 16  # Version 2, as published by the Free Software Foundation. 
 17  # 
 18  # This program is distributed in the hope that it will be useful, 
 19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
 21  # 
 22  # Copies of the GNU General Public License are available from 
 23  # the Free Software Foundation website, http://www.gnu.org/. 
 24  # 
 25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 26  # 
 27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
 28  # Language : Python (>= 2.3) 
 29  # Project  : Cedar Backup, release 2 
 30  # Revision : $Id: collect.py 858 2008-03-17 02:46:45Z pronovic $ 
 31  # Purpose  : Implements the standard 'collect' action. 
 32  # 
 33  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
 34   
 35  ######################################################################## 
 36  # Module documentation 
 37  ######################################################################## 
 38   
 39  """ 
 40  Implements the standard 'collect' action. 
 41  @sort: executeCollect 
 42  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
 43  """ 
 44   
 45   
 46  ######################################################################## 
 47  # Imported modules 
 48  ######################################################################## 
 49   
 50  # System modules 
 51  import os 
 52  import logging 
 53  import pickle 
 54   
 55  # Cedar Backup modules 
 56  from CedarBackup2.filesystem import BackupFileList 
 57  from CedarBackup2.util import isStartOfWeek, changeOwnership, displayBytes, buildNormalizedPath 
 58  from CedarBackup2.actions.constants import DIGEST_EXTENSION, COLLECT_INDICATOR 
 59  from CedarBackup2.actions.util import writeIndicatorFile 
 60   
 61   
 62  ######################################################################## 
 63  # Module-wide constants and variables 
 64  ######################################################################## 
 65   
 66  logger = logging.getLogger("CedarBackup2.log.actions.collect") 
 67   
 68   
 69  ######################################################################## 
 70  # Public functions 
 71  ######################################################################## 
 72   
 73  ############################ 
 74  # executeCollect() function 
 75  ############################ 
 76   
77 -def executeCollect(configPath, options, config):
78 """ 79 Executes the collect backup action. 80 81 @note: When the collect action is complete, we will write a collect 82 indicator to the collect directory, so it's obvious that the collect action 83 has completed. The stage process uses this indicator to decide whether a 84 peer is ready to be staged. 85 86 @param configPath: Path to configuration file on disk. 87 @type configPath: String representing a path on disk. 88 89 @param options: Program command-line options. 90 @type options: Options object. 91 92 @param config: Program configuration. 93 @type config: Config object. 94 95 @raise ValueError: Under many generic error conditions 96 @raise TarError: If there is a problem creating a tar file 97 """ 98 logger.debug("Executing the 'collect' action.") 99 if config.options is None or config.collect is None: 100 raise ValueError("Collect configuration is not properly filled in.") 101 if ((config.collect.collectFiles is None or len(config.collect.collectFiles) < 1) and 102 (config.collect.collectDirs is None or len(config.collect.collectDirs) < 1)): 103 raise ValueError("There must be at least one collect file or collect directory.") 104 fullBackup = options.full 105 logger.debug("Full backup flag is [%s]" % fullBackup) 106 todayIsStart = isStartOfWeek(config.options.startingDay) 107 resetDigest = fullBackup or todayIsStart 108 logger.debug("Reset digest flag is [%s]" % resetDigest) 109 if config.collect.collectFiles is not None: 110 for collectFile in config.collect.collectFiles: 111 logger.debug("Working with collect file [%s]" % collectFile.absolutePath) 112 collectMode = _getCollectMode(config, collectFile) 113 archiveMode = _getArchiveMode(config, collectFile) 114 digestPath = _getDigestPath(config, collectFile) 115 tarfilePath = _getTarfilePath(config, collectFile, archiveMode) 116 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 117 logger.debug("File meets criteria to be backed up today.") 118 _collectFile(config, collectFile.absolutePath, tarfilePath, 119 collectMode, archiveMode, resetDigest, digestPath) 120 else: 121 logger.debug("File will not be backed up, per collect mode.") 122 logger.info("Completed collecting file [%s]" % collectFile.absolutePath) 123 if config.collect.collectDirs is not None: 124 for collectDir in config.collect.collectDirs: 125 logger.debug("Working with collect directory [%s]" % collectDir.absolutePath) 126 collectMode = _getCollectMode(config, collectDir) 127 archiveMode = _getArchiveMode(config, collectDir) 128 ignoreFile = _getIgnoreFile(config, collectDir) 129 linkDepth = _getLinkDepth(collectDir) 130 digestPath = _getDigestPath(config, collectDir) 131 tarfilePath = _getTarfilePath(config, collectDir, archiveMode) 132 (excludePaths, excludePatterns) = _getExclusions(config, collectDir) 133 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart): 134 logger.debug("Directory meets criteria to be backed up today.") 135 _collectDirectory(config, collectDir.absolutePath, tarfilePath, 136 collectMode, archiveMode, ignoreFile, linkDepth, 137 resetDigest, digestPath, excludePaths, excludePatterns) 138 else: 139 logger.debug("Directory will not be backed up, per collect mode.") 140 logger.info("Completed collecting directory [%s]" % collectDir.absolutePath) 141 writeIndicatorFile(config.collect.targetDir, COLLECT_INDICATOR, 142 config.options.backupUser, config.options.backupGroup) 143 logger.info("Executed the 'collect' action successfully.")
144 145 146 ######################################################################## 147 # Private utility functions 148 ######################################################################## 149 150 ########################## 151 # _collectFile() function 152 ########################## 153
154 -def _collectFile(config, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
155 """ 156 Collects a configured collect file. 157 158 The indicated collect file is collected into the indicated tarfile. 159 For files that are collected incrementally, we'll use the indicated 160 digest path and pay attention to the reset digest flag (basically, the reset 161 digest flag ignores any existing digest, but a new digest is always 162 rewritten). 163 164 The caller must decide what the collect and archive modes are, since they 165 can be on both the collect configuration and the collect file itself. 166 167 @param config: Config object. 168 @param absolutePath: Absolute path of file to collect. 169 @param tarfilePath: Path to tarfile that should be created. 170 @param collectMode: Collect mode to use. 171 @param archiveMode: Archive mode to use. 172 @param resetDigest: Reset digest flag. 173 @param digestPath: Path to digest file on disk, if needed. 174 """ 175 backupList = BackupFileList() 176 backupList.addFile(absolutePath) 177 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath)
178 179 180 ############################### 181 # _collectDirectory() function 182 ############################### 183
184 -def _collectDirectory(config, absolutePath, tarfilePath, collectMode, archiveMode, 185 ignoreFile, linkDepth, resetDigest, digestPath, excludePaths, 186 excludePatterns):
187 """ 188 Collects a configured collect directory. 189 190 The indicated collect directory is collected into the indicated tarfile. 191 For directories that are collected incrementally, we'll use the indicated 192 digest path and pay attention to the reset digest flag (basically, the reset 193 digest flag ignores any existing digest, but a new digest is always 194 rewritten). 195 196 The caller must decide what the collect and archive modes are, since they 197 can be on both the collect configuration and the collect directory itself. 198 199 @param config: Config object. 200 @param absolutePath: Absolute path of directory to collect. 201 @param tarfilePath: Path to tarfile that should be created. 202 @param collectMode: Collect mode to use. 203 @param archiveMode: Archive mode to use. 204 @param ignoreFile: Ignore file to use. 205 @param linkDepth: Link depth value to use. 206 @param resetDigest: Reset digest flag. 207 @param digestPath: Path to digest file on disk, if needed. 208 @param excludePaths: List of absolute paths to exclude. 209 @param excludePatterns: List of patterns to exclude. 210 """ 211 backupList = BackupFileList() 212 backupList.ignoreFile = ignoreFile 213 backupList.excludePaths = excludePaths 214 backupList.excludePatterns = excludePatterns 215 backupList.addDirContents(absolutePath, linkDepth=linkDepth) 216 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath)
217 218 219 ############################ 220 # _executeBackup() function 221 ############################ 222
223 -def _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
224 """ 225 Execute the backup process for the indicated backup list. 226 227 This function exists mainly to consolidate functionality between the 228 L{_collectFile} and L{_collectDirectory} functions. Those functions build 229 the backup list; this function causes the backup to execute properly and 230 also manages usage of the digest file on disk as explained in their 231 comments. 232 233 For collect files, the digest file will always just contain the single file 234 that is being backed up. This might little wasteful in terms of the number 235 of files that we keep around, but it's consistent and easy to understand. 236 237 @param config: Config object. 238 @param backupList: List to execute backup for 239 @param absolutePath: Absolute path of directory or file to collect. 240 @param tarfilePath: Path to tarfile that should be created. 241 @param collectMode: Collect mode to use. 242 @param archiveMode: Archive mode to use. 243 @param resetDigest: Reset digest flag. 244 @param digestPath: Path to digest file on disk, if needed. 245 """ 246 if collectMode != 'incr': 247 logger.debug("Collect mode is [%s]; no digest will be used." % collectMode) 248 if len(backupList) == 1 and backupList[0] == absolutePath: # special case for individual file 249 logger.info("Backing up file [%s] (%s)." % (absolutePath, displayBytes(backupList.totalSize()))) 250 else: 251 logger.info("Backing up %d files in [%s] (%s)." % (len(backupList), absolutePath, displayBytes(backupList.totalSize()))) 252 if len(backupList) > 0: 253 backupList.generateTarfile(tarfilePath, archiveMode, True) 254 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 255 else: 256 if resetDigest: 257 logger.debug("Based on resetDigest flag, digest will be cleared.") 258 oldDigest = {} 259 else: 260 logger.debug("Based on resetDigest flag, digest will loaded from disk.") 261 oldDigest = _loadDigest(digestPath) 262 (removed, newDigest) = backupList.removeUnchanged(oldDigest, captureDigest=True) 263 logger.debug("Removed %d unchanged files based on digest values." % removed) 264 if len(backupList) == 1 and backupList[0] == absolutePath: # special case for individual file 265 logger.info("Backing up file [%s] (%s)." % (absolutePath, displayBytes(backupList.totalSize()))) 266 else: 267 logger.info("Backing up %d files in [%s] (%s)." % (len(backupList), absolutePath, displayBytes(backupList.totalSize()))) 268 if len(backupList) > 0: 269 backupList.generateTarfile(tarfilePath, archiveMode, True) 270 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup) 271 _writeDigest(config, newDigest, digestPath)
272 273 274 ######################### 275 # _loadDigest() function 276 ######################### 277
278 -def _loadDigest(digestPath):
279 """ 280 Loads the indicated digest path from disk into a dictionary. 281 282 If we can't load the digest successfully (either because it doesn't exist or 283 for some other reason), then an empty dictionary will be returned - but the 284 condition will be logged. 285 286 @param digestPath: Path to the digest file on disk. 287 288 @return: Dictionary representing contents of digest path. 289 """ 290 if not os.path.isfile(digestPath): 291 digest = {} 292 logger.debug("Digest [%s] does not exist on disk." % digestPath) 293 else: 294 try: 295 digest = pickle.load(open(digestPath, "r")) 296 logger.debug("Loaded digest [%s] from disk: %d entries." % (digestPath, len(digest))) 297 except: 298 digest = {} 299 logger.error("Failed loading digest [%s] from disk." % digestPath) 300 return digest
301 302 303 ########################## 304 # _writeDigest() function 305 ########################## 306
307 -def _writeDigest(config, digest, digestPath):
308 """ 309 Writes the digest dictionary to the indicated digest path on disk. 310 311 If we can't write the digest successfully for any reason, we'll log the 312 condition but won't throw an exception. 313 314 @param config: Config object. 315 @param digest: Digest dictionary to write to disk. 316 @param digestPath: Path to the digest file on disk. 317 """ 318 try: 319 pickle.dump(digest, open(digestPath, "w")) 320 changeOwnership(digestPath, config.options.backupUser, config.options.backupGroup) 321 logger.debug("Wrote new digest [%s] to disk: %d entries." % (digestPath, len(digest))) 322 except: 323 logger.error("Failed to write digest [%s] to disk." % digestPath)
324 325 326 ######################################################################## 327 # Private attribute "getter" functions 328 ######################################################################## 329 330 ############################ 331 # getCollectMode() function 332 ############################ 333
334 -def _getCollectMode(config, item):
335 """ 336 Gets the collect mode that should be used for a collect directory or file. 337 If possible, use the one on the file or directory, otherwise take from collect section. 338 @param config: Config object. 339 @param item: C{CollectFile} or C{CollectDir} object 340 @return: Collect mode to use. 341 """ 342 if item.collectMode is None: 343 collectMode = config.collect.collectMode 344 else: 345 collectMode = item.collectMode 346 logger.debug("Collect mode is [%s]" % collectMode) 347 return collectMode
348 349 350 ############################# 351 # _getArchiveMode() function 352 ############################# 353
354 -def _getArchiveMode(config, item):
355 """ 356 Gets the archive mode that should be used for a collect directory or file. 357 If possible, use the one on the file or directory, otherwise take from collect section. 358 @param config: Config object. 359 @param item: C{CollectFile} or C{CollectDir} object 360 @return: Archive mode to use. 361 """ 362 if item.archiveMode is None: 363 archiveMode = config.collect.archiveMode 364 else: 365 archiveMode = item.archiveMode 366 logger.debug("Archive mode is [%s]" % archiveMode) 367 return archiveMode
368 369 370 ############################ 371 # _getIgnoreFile() function 372 ############################ 373
374 -def _getIgnoreFile(config, item):
375 """ 376 Gets the ignore file that should be used for a collect directory or file. 377 If possible, use the one on the file or directory, otherwise take from collect section. 378 @param config: Config object. 379 @param item: C{CollectFile} or C{CollectDir} object 380 @return: Ignore file to use. 381 """ 382 if item.ignoreFile is None: 383 ignoreFile = config.collect.ignoreFile 384 else: 385 ignoreFile = item.ignoreFile 386 logger.debug("Ignore file is [%s]" % ignoreFile) 387 return ignoreFile
388 389 390 ############################ 391 # _getLinkDepth() function 392 ############################ 393
394 -def _getLinkDepth(item):
395 """ 396 Gets the link depth that should be used for a collect directory. 397 If possible, use the one on the directory, otherwise set a value of 0 (zero). 398 @param item: C{CollectDir} object 399 @return: Ignore file to use. 400 """ 401 if item.linkDepth is None: 402 linkDepth = 0 403 else: 404 linkDepth = item.linkDepth 405 logger.debug("Link depth is [%d]" % linkDepth) 406 return linkDepth
407 408 409 ############################ 410 # _getDigestPath() function 411 ############################ 412
413 -def _getDigestPath(config, item):
414 """ 415 Gets the digest path associated with a collect directory or file. 416 @param config: Config object. 417 @param item: C{CollectFile} or C{CollectDir} object 418 @return: Absolute path to the digest associated with the collect directory or file. 419 """ 420 normalized = buildNormalizedPath(item.absolutePath) 421 filename = "%s.%s" % (normalized, DIGEST_EXTENSION) 422 digestPath = os.path.join(config.options.workingDir, filename) 423 logger.debug("Digest path is [%s]" % digestPath) 424 return digestPath
425 426 427 ############################# 428 # _getTarfilePath() function 429 ############################# 430
431 -def _getTarfilePath(config, item, archiveMode):
432 """ 433 Gets the tarfile path (including correct extension) associated with a collect directory. 434 @param config: Config object. 435 @param item: C{CollectFile} or C{CollectDir} object 436 @param archiveMode: Archive mode to use for this tarfile. 437 @return: Absolute path to the tarfile associated with the collect directory. 438 """ 439 if archiveMode == 'tar': 440 extension = "tar" 441 elif archiveMode == 'targz': 442 extension = "tar.gz" 443 elif archiveMode == 'tarbz2': 444 extension = "tar.bz2" 445 normalized = buildNormalizedPath(item.absolutePath) 446 filename = "%s.%s" % (normalized, extension) 447 tarfilePath = os.path.join(config.collect.targetDir, filename) 448 logger.debug("Tarfile path is [%s]" % tarfilePath) 449 return tarfilePath
450 451 452 ############################ 453 # _getExclusions() function 454 ############################ 455
456 -def _getExclusions(config, collectDir):
457 """ 458 Gets exclusions (file and patterns) associated with a collect directory. 459 460 The returned files value is a list of absolute paths to be excluded from the 461 backup for a given directory. It is derived from the collect configuration 462 absolute exclude paths and the collect directory's absolute and relative 463 exclude paths. 464 465 The returned patterns value is a list of patterns to be excluded from the 466 backup for a given directory. It is derived from the list of patterns from 467 the collect configuration and from the collect directory itself. 468 469 @param config: Config object. 470 @param collectDir: Collect directory object. 471 472 @return: Tuple (files, patterns) indicating what to exclude. 473 """ 474 paths = [] 475 if config.collect.absoluteExcludePaths is not None: 476 paths.extend(config.collect.absoluteExcludePaths) 477 if collectDir.absoluteExcludePaths is not None: 478 paths.extend(collectDir.absoluteExcludePaths) 479 if collectDir.relativeExcludePaths is not None: 480 for relativePath in collectDir.relativeExcludePaths: 481 paths.append(os.path.join(collectDir.absolutePath, relativePath)) 482 patterns = [] 483 if config.collect.excludePatterns is not None: 484 patterns.extend(config.collect.excludePatterns) 485 if collectDir.excludePatterns is not None: 486 patterns.extend(collectDir.excludePatterns) 487 logger.debug("Exclude paths: %s" % paths) 488 logger.debug("Exclude patterns: %s" % patterns) 489 return(paths, patterns)
490