Package CedarBackup2 :: Module filesystem
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.filesystem

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2004-2008 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # This program is free software; you can redistribute it and/or 
  15  # modify it under the terms of the GNU General Public License, 
  16  # Version 2, as published by the Free Software Foundation. 
  17  # 
  18  # This program is distributed in the hope that it will be useful, 
  19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  21  # 
  22  # Copies of the GNU General Public License are available from 
  23  # the Free Software Foundation website, http://www.gnu.org/. 
  24  # 
  25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  26  # 
  27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  28  # Language : Python (>= 2.3) 
  29  # Project  : Cedar Backup, release 2 
  30  # Revision : $Id: filesystem.py 858 2008-03-17 02:46:45Z pronovic $ 
  31  # Purpose  : Provides filesystem-related objects. 
  32  # 
  33  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  34   
  35  ######################################################################## 
  36  # Module documentation 
  37  ######################################################################## 
  38   
  39  """ 
  40  Provides filesystem-related objects. 
  41  @sort: FilesystemList, BackupFileList, PurgeItemList 
  42  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  43  """ 
  44   
  45   
  46  ######################################################################## 
  47  # Imported modules 
  48  ######################################################################## 
  49   
  50  # System modules 
  51  import sys 
  52  import os 
  53  import re 
  54  import sha 
  55  import logging 
  56  import tarfile 
  57   
  58  # Cedar Backup modules 
  59  from CedarBackup2.knapsack import firstFit, bestFit, worstFit, alternateFit 
  60  from CedarBackup2.util import AbsolutePathList, ObjectTypeList, UnorderedList, RegexList 
  61  from CedarBackup2.util import removeKeys, displayBytes, calculateFileAge, encodePath 
  62   
  63   
  64  ######################################################################## 
  65  # Module-wide variables 
  66  ######################################################################## 
  67   
  68  logger = logging.getLogger("CedarBackup2.log.filesystem") 
  69   
  70   
  71  ######################################################################## 
  72  # FilesystemList class definition 
  73  ######################################################################## 
  74   
75 -class FilesystemList(list):
76 77 ###################### 78 # Class documentation 79 ###################### 80 81 """ 82 Represents a list of filesystem items. 83 84 This is a generic class that represents a list of filesystem items. Callers 85 can add individual files or directories to the list, or can recursively add 86 the contents of a directory. The class also allows for up-front exclusions 87 in several forms (all files, all directories, all items matching a pattern, 88 all items whose basename matches a pattern, or all directories containing a 89 specific "ignore file"). Symbolic links are typically backed up 90 non-recursively, i.e. the link to a directory is backed up, but not the 91 contents of that link (we don't want to deal with recursive loops, etc.). 92 93 The custom methods such as L{addFile} will only add items if they exist on 94 the filesystem and do not match any exclusions that are already in place. 95 However, since a FilesystemList is a subclass of Python's standard list 96 class, callers can also add items to the list in the usual way, using 97 methods like C{append()} or C{insert()}. No validations apply to items 98 added to the list in this way; however, many list-manipulation methods deal 99 "gracefully" with items that don't exist in the filesystem, often by 100 ignoring them. 101 102 Once a list has been created, callers can remove individual items from the 103 list using standard methods like C{pop()} or C{remove()} or they can use 104 custom methods to remove specific types of entries or entries which match a 105 particular pattern. 106 107 @note: Regular expression patterns that apply to paths are assumed to be 108 bounded at front and back by the beginning and end of the string, i.e. they 109 are treated as if they begin with C{^} and end with C{$}. This is true 110 whether we are matching a complete path or a basename. 111 112 @note: Some platforms, like Windows, do not support soft links. On those 113 platforms, the ignore-soft-links flag can be set, but it won't do any good 114 because the operating system never reports a file as a soft link. 115 116 @sort: __init__, addFile, addDir, addDirContents, removeFiles, removeDirs, 117 removeLinks, removeMatch, removeInvalid, normalize, validate, 118 excludeFiles, excludeDirs, excludeLinks, excludePaths, 119 excludePatterns, excludeBasenamePatterns, ignoreFile 120 """ 121 122 123 ############## 124 # Constructor 125 ############## 126
127 - def __init__(self):
128 """Initializes a list with no configured exclusions.""" 129 list.__init__(self) 130 self._excludeFiles = False 131 self._excludeDirs = False 132 self._excludeLinks = False 133 self._excludePaths = None 134 self._excludePatterns = None 135 self._excludeBasenamePatterns = None 136 self._ignoreFile = None 137 self.excludeFiles = False 138 self.excludeLinks = False 139 self.excludeDirs = False 140 self.excludePaths = [] 141 self.excludePatterns = RegexList() 142 self.excludeBasenamePatterns = RegexList() 143 self.ignoreFile = None
144 145 146 ############# 147 # Properties 148 ############# 149
150 - def _setExcludeFiles(self, value):
151 """ 152 Property target used to set the exclude files flag. 153 No validations, but we normalize the value to C{True} or C{False}. 154 """ 155 if value: 156 self._excludeFiles = True 157 else: 158 self._excludeFiles = False
159
160 - def _getExcludeFiles(self):
161 """ 162 Property target used to get the exclude files flag. 163 """ 164 return self._excludeFiles
165
166 - def _setExcludeDirs(self, value):
167 """ 168 Property target used to set the exclude directories flag. 169 No validations, but we normalize the value to C{True} or C{False}. 170 """ 171 if value: 172 self._excludeDirs = True 173 else: 174 self._excludeDirs = False
175
176 - def _getExcludeDirs(self):
177 """ 178 Property target used to get the exclude directories flag. 179 """ 180 return self._excludeDirs
181 191 197
198 - def _setExcludePaths(self, value):
199 """ 200 Property target used to set the exclude paths list. 201 A C{None} value is converted to an empty list. 202 Elements do not have to exist on disk at the time of assignment. 203 @raise ValueError: If any list element is not an absolute path. 204 """ 205 self._absoluteExcludePaths = AbsolutePathList() 206 if value is not None: 207 self._absoluteExcludePaths.extend(value)
208
209 - def _getExcludePaths(self):
210 """ 211 Property target used to get the absolute exclude paths list. 212 """ 213 return self._absoluteExcludePaths
214
215 - def _setExcludePatterns(self, value):
216 """ 217 Property target used to set the exclude patterns list. 218 A C{None} value is converted to an empty list. 219 """ 220 self._excludePatterns = RegexList() 221 if value is not None: 222 self._excludePatterns.extend(value)
223
224 - def _getExcludePatterns(self):
225 """ 226 Property target used to get the exclude patterns list. 227 """ 228 return self._excludePatterns
229
230 - def _setExcludeBasenamePatterns(self, value):
231 """ 232 Property target used to set the exclude basename patterns list. 233 A C{None} value is converted to an empty list. 234 """ 235 self._excludeBasenamePatterns = RegexList() 236 if value is not None: 237 self._excludeBasenamePatterns.extend(value)
238
240 """ 241 Property target used to get the exclude basename patterns list. 242 """ 243 return self._excludeBasenamePatterns
244
245 - def _setIgnoreFile(self, value):
246 """ 247 Property target used to set the ignore file. 248 The value must be a non-empty string if it is not C{None}. 249 @raise ValueError: If the value is an empty string. 250 """ 251 if value is not None: 252 if len(value) < 1: 253 raise ValueError("The ignore file must be a non-empty string.") 254 self._ignoreFile = value
255
256 - def _getIgnoreFile(self):
257 """ 258 Property target used to get the ignore file. 259 """ 260 return self._ignoreFile
261 262 excludeFiles = property(_getExcludeFiles, _setExcludeFiles, None, "Boolean indicating whether files should be excluded.") 263 excludeDirs = property(_getExcludeDirs, _setExcludeDirs, None, "Boolean indicating whether directories should be excluded.") 264 excludeLinks = property(_getExcludeLinks, _setExcludeLinks, None, "Boolean indicating whether soft links should be excluded.") 265 excludePaths = property(_getExcludePaths, _setExcludePaths, None, "List of absolute paths to be excluded.") 266 excludePatterns = property(_getExcludePatterns, _setExcludePatterns, None, 267 "List of regular expression patterns (matching complete path) to be excluded.") 268 excludeBasenamePatterns = property(_getExcludeBasenamePatterns, _setExcludeBasenamePatterns, 269 None, "List of regular expression patterns (matching basename) to be excluded.") 270 ignoreFile = property(_getIgnoreFile, _setIgnoreFile, None, "Name of file which will cause directory contents to be ignored.") 271 272 273 ############## 274 # Add methods 275 ############## 276
277 - def addFile(self, path):
278 """ 279 Adds a file to the list. 280 281 The path must exist and must be a file or a link to an existing file. It 282 will be added to the list subject to any exclusions that are in place. 283 284 @param path: File path to be added to the list 285 @type path: String representing a path on disk 286 287 @return: Number of items added to the list. 288 289 @raise ValueError: If path is not a file or does not exist. 290 @raise ValueError: If the path could not be encoded properly. 291 """ 292 path = encodePath(path) 293 if not os.path.exists(path) or not os.path.isfile(path): 294 logger.debug("Path [%s] is not a file or does not exist on disk." % path) 295 raise ValueError("Path is not a file or does not exist on disk.") 296 if self.excludeLinks and os.path.islink(path): 297 logger.debug("Path [%s] is excluded based on excludeLinks." % path) 298 return 0 299 if self.excludeFiles: 300 logger.debug("Path [%s] is excluded based on excludeFiles." % path) 301 return 0 302 if path in self.excludePaths: 303 logger.debug("Path [%s] is excluded based on excludePaths." % path) 304 return 0 305 for pattern in self.excludePatterns: 306 if re.compile(r"^%s$" % pattern).match(path): # safe to assume all are valid due to RegexList 307 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 308 return 0 309 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 310 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 311 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 312 return 0 313 self.append(path) 314 logger.debug("Added file to list: [%s]" % path) 315 return 1
316
317 - def addDir(self, path):
318 """ 319 Adds a directory to the list. 320 321 The path must exist and must be a directory or a link to an existing 322 directory. It will be added to the list subject to any exclusions that 323 are in place. The L{ignoreFile} does not apply to this method, only to 324 L{addDirContents}. 325 326 @param path: Directory path to be added to the list 327 @type path: String representing a path on disk 328 329 @return: Number of items added to the list. 330 331 @raise ValueError: If path is not a directory or does not exist. 332 @raise ValueError: If the path could not be encoded properly. 333 """ 334 path = encodePath(path) 335 path = normalizeDir(path) 336 if not os.path.exists(path) or not os.path.isdir(path): 337 logger.debug("Path [%s] is not a directory or does not exist on disk." % path) 338 raise ValueError("Path is not a directory or does not exist on disk.") 339 if self.excludeLinks and os.path.islink(path): 340 logger.debug("Path [%s] is excluded based on excludeLinks." % path) 341 return 0 342 if self.excludeDirs: 343 logger.debug("Path [%s] is excluded based on excludeDirs." % path) 344 return 0 345 if path in self.excludePaths: 346 logger.debug("Path [%s] is excluded based on excludePaths." % path) 347 return 0 348 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 349 if re.compile(r"^%s$" % pattern).match(path): 350 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 351 return 0 352 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 353 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 354 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 355 return 0 356 self.append(path) 357 logger.debug("Added directory to list: [%s]" % path) 358 return 1
359
360 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0):
361 """ 362 Adds the contents of a directory to the list. 363 364 The path must exist and must be a directory or a link to a directory. 365 The contents of the directory (as well as the directory path itself) will 366 be recursively added to the list, subject to any exclusions that are in 367 place. If you only want the directory and its immediate contents to be 368 added, then pass in C{recursive=False}. 369 370 @note: If a directory's absolute path matches an exclude pattern or path, 371 or if the directory contains the configured ignore file, then the 372 directory and all of its contents will be recursively excluded from the 373 list. 374 375 @note: If the passed-in directory happens to be a soft link, it will be 376 recursed. However, the linkDepth parameter controls whether any soft 377 links I{within} the directory will be recursed. The link depth is 378 maximum depth of the tree at which soft links should be followed. So, a 379 depth of 0 does not follow any soft links, a depth of 1 follows only 380 links within the passed-in directory, a depth of 2 follows the links at 381 the next level down, etc. 382 383 @note: Any invalid soft links (i.e. soft links that point to 384 non-existent items) will be silently ignored. 385 386 @note: The L{excludeDirs} flag only controls whether any given directory 387 path itself is added to the list once it has been discovered. It does 388 I{not} modify any behavior related to directory recursion. 389 390 @param path: Directory path whose contents should be added to the list 391 @type path: String representing a path on disk 392 393 @param recursive: Indicates whether directory contents should be added recursively. 394 @type recursive: Boolean value 395 396 @param addSelf: Indicates whether the directory itself should be added to the list. 397 @type addSelf: Boolean value 398 399 @param linkDepth: Maximum depth of the tree at which soft links should be followed 400 @type linkDepth: Integer value, where zero means not to follow any soft links 401 402 @return: Number of items recursively added to the list 403 404 @raise ValueError: If path is not a directory or does not exist. 405 @raise ValueError: If the path could not be encoded properly. 406 """ 407 path = encodePath(path) 408 path = normalizeDir(path) 409 return self._addDirContentsInternal(path, addSelf, recursive, linkDepth)
410
411 - def _addDirContentsInternal(self, path, includePath=True, recursive=True, linkDepth=0):
412 """ 413 Internal implementation of C{addDirContents}. 414 415 This internal implementation exists due to some refactoring. Basically, 416 some subclasses have a need to add the contents of a directory, but not 417 the directory itself. This is different than the standard C{FilesystemList} 418 behavior and actually ends up making a special case out of the first 419 call in the recursive chain. Since I don't want to expose the modified 420 interface, C{addDirContents} ends up being wholly implemented in terms 421 of this method. 422 423 The linkDepth parameter controls whether soft links are followed when we 424 are adding the contents recursively. Any recursive calls reduce the 425 value by one. If the value zero or less, then soft links will just be 426 added as directories, but will not be followed. 427 428 @param path: Directory path whose contents should be added to the list. 429 @param includePath: Indicates whether to include the path as well as contents. 430 @param recursive: Indicates whether directory contents should be added recursively. 431 @param linkDepth: Depth of soft links that should be followed 432 433 @return: Number of items recursively added to the list 434 435 @raise ValueError: If path is not a directory or does not exist. 436 """ 437 added = 0 438 if not os.path.exists(path) or not os.path.isdir(path): 439 logger.debug("Path [%s] is not a directory or does not exist on disk." % path) 440 raise ValueError("Path is not a directory or does not exist on disk.") 441 if path in self.excludePaths: 442 logger.debug("Path [%s] is excluded based on excludePaths." % path) 443 return added 444 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 445 if re.compile(r"^%s$" % pattern).match(path): 446 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 447 return added 448 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 449 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 450 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 451 return added 452 if self.ignoreFile is not None and os.path.exists(os.path.join(path, self.ignoreFile)): 453 logger.debug("Path [%s] is excluded based on ignore file." % path) 454 return added 455 if includePath: 456 added += self.addDir(path) # could actually be excluded by addDir, yet 457 for entry in os.listdir(path): 458 entrypath = os.path.join(path, entry) 459 if os.path.isfile(entrypath): 460 added += self.addFile(entrypath) 461 elif os.path.isdir(entrypath): 462 if os.path.islink(entrypath): 463 if recursive and linkDepth > 0: 464 newDepth = linkDepth - 1; 465 added += self._addDirContentsInternal(entrypath, linkDepth=newDepth) 466 else: 467 added += self.addDir(entrypath) 468 else: 469 if recursive: 470 newDepth = linkDepth - 1; 471 added += self._addDirContentsInternal(entrypath, linkDepth=newDepth) 472 else: 473 added += self.addDir(entrypath) 474 return added
475 476 477 ################# 478 # Remove methods 479 ################# 480
481 - def removeFiles(self, pattern=None):
482 """ 483 Removes file entries from the list. 484 485 If C{pattern} is not passed in or is C{None}, then all file entries will 486 be removed from the list. Otherwise, only those file entries matching 487 the pattern will be removed. Any entry which does not exist on disk 488 will be ignored (use L{removeInvalid} to purge those entries). 489 490 This method might be fairly slow for large lists, since it must check the 491 type of each item in the list. If you know ahead of time that you want 492 to exclude all files, then you will be better off setting L{excludeFiles} 493 to C{True} before adding items to the list. 494 495 @param pattern: Regular expression pattern representing entries to remove 496 497 @return: Number of entries removed 498 @raise ValueError: If the passed-in pattern is not a valid regular expression. 499 """ 500 removed = 0 501 if pattern is None: 502 for entry in self[:]: 503 if os.path.exists(entry) and os.path.isfile(entry): 504 self.remove(entry) 505 logger.debug("Removed path [%s] from list." % entry) 506 removed += 1 507 else: 508 try: 509 compiled = re.compile(pattern) 510 except re.error: 511 raise ValueError("Pattern is not a valid regular expression.") 512 for entry in self[:]: 513 if os.path.exists(entry) and os.path.isfile(entry): 514 if compiled.match(entry): 515 self.remove(entry) 516 logger.debug("Removed path [%s] from list." % entry) 517 removed += 1 518 logger.debug("Removed a total of %d entries." % removed); 519 return removed
520
521 - def removeDirs(self, pattern=None):
522 """ 523 Removes directory entries from the list. 524 525 If C{pattern} is not passed in or is C{None}, then all directory entries 526 will be removed from the list. Otherwise, only those directory entries 527 matching the pattern will be removed. Any entry which does not exist on 528 disk will be ignored (use L{removeInvalid} to purge those entries). 529 530 This method might be fairly slow for large lists, since it must check the 531 type of each item in the list. If you know ahead of time that you want 532 to exclude all directories, then you will be better off setting 533 L{excludeDirs} to C{True} before adding items to the list (note that this 534 will not prevent you from recursively adding the I{contents} of 535 directories). 536 537 @param pattern: Regular expression pattern representing entries to remove 538 539 @return: Number of entries removed 540 @raise ValueError: If the passed-in pattern is not a valid regular expression. 541 """ 542 removed = 0 543 if pattern is None: 544 for entry in self[:]: 545 if os.path.exists(entry) and os.path.isdir(entry): 546 self.remove(entry) 547 logger.debug("Removed path [%s] from list." % entry) 548 removed += 1 549 else: 550 try: 551 compiled = re.compile(pattern) 552 except re.error: 553 raise ValueError("Pattern is not a valid regular expression.") 554 for entry in self[:]: 555 if os.path.exists(entry) and os.path.isdir(entry): 556 if compiled.match(entry): 557 self.remove(entry) 558 logger.debug("Removed path [%s] from list based on pattern [%s]." % (entry, pattern)) 559 removed += 1 560 logger.debug("Removed a total of %d entries." % removed); 561 return removed
562 602
603 - def removeMatch(self, pattern):
604 """ 605 Removes from the list all entries matching a pattern. 606 607 This method removes from the list all entries which match the passed in 608 C{pattern}. Since there is no need to check the type of each entry, it 609 is faster to call this method than to call the L{removeFiles}, 610 L{removeDirs} or L{removeLinks} methods individually. If you know which 611 patterns you will want to remove ahead of time, you may be better off 612 setting L{excludePatterns} or L{excludeBasenamePatterns} before adding 613 items to the list. 614 615 @note: Unlike when using the exclude lists, the pattern here is I{not} 616 bounded at the front and the back of the string. You can use any pattern 617 you want. 618 619 @param pattern: Regular expression pattern representing entries to remove 620 621 @return: Number of entries removed. 622 @raise ValueError: If the passed-in pattern is not a valid regular expression. 623 """ 624 try: 625 compiled = re.compile(pattern) 626 except re.error: 627 raise ValueError("Pattern is not a valid regular expression.") 628 removed = 0 629 for entry in self[:]: 630 if compiled.match(entry): 631 self.remove(entry) 632 logger.debug("Removed path [%s] from list based on pattern [%s]." % (entry, pattern)) 633 removed += 1 634 logger.debug("Removed a total of %d entries." % removed); 635 return removed
636
637 - def removeInvalid(self):
638 """ 639 Removes from the list all entries that do not exist on disk. 640 641 This method removes from the list all entries which do not currently 642 exist on disk in some form. No attention is paid to whether the entries 643 are files or directories. 644 645 @return: Number of entries removed. 646 """ 647 removed = 0 648 for entry in self[:]: 649 if not os.path.exists(entry): 650 self.remove(entry) 651 logger.debug("Removed path [%s] from list." % entry) 652 removed += 1 653 logger.debug("Removed a total of %d entries." % removed); 654 return removed
655 656 657 ################## 658 # Utility methods 659 ################## 660
661 - def normalize(self):
662 """Normalizes the list, ensuring that each entry is unique.""" 663 orig = len(self) 664 self.sort() 665 dups = filter(lambda x, self=self: self[x] == self[x+1], range(0, len(self) - 1)) 666 items = map(lambda x, self=self: self[x], dups) 667 map(self.remove, items) 668 new = len(self) 669 logger.debug("Completed normalizing list; removed %d items (%d originally, %d now)." % (new-orig, orig, new))
670
671 - def verify(self):
672 """ 673 Verifies that all entries in the list exist on disk. 674 @return: C{True} if all entries exist, C{False} otherwise. 675 """ 676 for entry in self: 677 if not os.path.exists(entry): 678 logger.debug("Path [%s] is invalid; list is not valid." % entry) 679 return False 680 logger.debug("All entries in list are valid.") 681 return True
682 683 684 ######################################################################## 685 # SpanItem class definition 686 ######################################################################## 687
688 -class SpanItem(object):
689 """ 690 Item returned by L{BackupFileList.generateSpan}. 691 """
692 - def __init__(self, fileList, size, capacity, utilization):
693 """ 694 Create object. 695 @param fileList: List of files 696 @param size: Size (in bytes) of files 697 @param utilization: Utilization, as a percentage (0-100) 698 """ 699 self.fileList = fileList 700 self.size = size 701 self.capacity = capacity 702 self.utilization = utilization
703 704 705 ######################################################################## 706 # BackupFileList class definition 707 ######################################################################## 708
709 -class BackupFileList(FilesystemList):
710 711 ###################### 712 # Class documentation 713 ###################### 714 715 """ 716 List of files to be backed up. 717 718 A BackupFileList is a L{FilesystemList} containing a list of files to be 719 backed up. It only contains files, not directories (soft links are treated 720 like files). On top of the generic functionality provided by 721 L{FilesystemList}, this class adds functionality to keep a hash (checksum) 722 for each file in the list, and it also provides a method to calculate the 723 total size of the files in the list and a way to export the list into tar 724 form. 725 726 @sort: __init__, addDir, totalSize, generateSizeMap, generateDigestMap, 727 generateFitted, generateTarfile, removeUnchanged 728 """ 729 730 ############## 731 # Constructor 732 ############## 733
734 - def __init__(self):
735 """Initializes a list with no configured exclusions.""" 736 FilesystemList.__init__(self)
737 738 739 ################################ 740 # Overridden superclass methods 741 ################################ 742
743 - def addDir(self, path):
744 """ 745 Adds a directory to the list. 746 747 Note that this class does not allow directories to be added by themselves 748 (a backup list contains only files). However, since links to directories 749 are technically files, we allow them to be added. 750 751 This method is implemented in terms of the superclass method, with one 752 additional validation: the superclass method is only called if the 753 passed-in path is both a directory and a link. All of the superclass's 754 existing validations and restrictions apply. 755 756 @param path: Directory path to be added to the list 757 @type path: String representing a path on disk 758 759 @return: Number of items added to the list. 760 761 @raise ValueError: If path is not a directory or does not exist. 762 @raise ValueError: If the path could not be encoded properly. 763 """ 764 path = encodePath(path) 765 path = normalizeDir(path) 766 if os.path.isdir(path) and not os.path.islink(path): 767 return 0 768 else: 769 return FilesystemList.addDir(self, path)
770 771 772 ################## 773 # Utility methods 774 ################## 775
776 - def totalSize(self):
777 """ 778 Returns the total size among all files in the list. 779 Only files are counted. 780 Soft links that point at files are ignored. 781 Entries which do not exist on disk are ignored. 782 @return: Total size, in bytes 783 """ 784 total = 0.0 785 for entry in self: 786 if os.path.isfile(entry) and not os.path.islink(entry): 787 total += float(os.stat(entry).st_size) 788 return total
789
790 - def generateSizeMap(self):
791 """ 792 Generates a mapping from file to file size in bytes. 793 The mapping does include soft links, which are listed with size zero. 794 Entries which do not exist on disk are ignored. 795 @return: Dictionary mapping file to file size 796 """ 797 table = { } 798 for entry in self: 799 if os.path.islink(entry): 800 table[entry] = 0.0 801 elif os.path.isfile(entry): 802 table[entry] = float(os.stat(entry).st_size) 803 return table
804
805 - def generateDigestMap(self, stripPrefix=None):
806 """ 807 Generates a mapping from file to file digest. 808 809 Currently, the digest is an SHA hash, which should be pretty secure. In 810 the future, this might be a different kind of hash, but we guarantee that 811 the type of the hash will not change unless the library major version 812 number is bumped. 813 814 Entries which do not exist on disk are ignored. 815 816 Soft links are ignored. We would end up generating a digest for the file 817 that the soft link points at, which doesn't make any sense. 818 819 If C{stripPrefix} is passed in, then that prefix will be stripped from 820 each key when the map is generated. This can be useful in generating two 821 "relative" digest maps to be compared to one another. 822 823 @param stripPrefix: Common prefix to be stripped from paths 824 @type stripPrefix: String with any contents 825 826 @return: Dictionary mapping file to digest value 827 @see: L{removeUnchanged} 828 """ 829 table = { } 830 if stripPrefix is not None: 831 for entry in self: 832 if os.path.isfile(entry) and not os.path.islink(entry): 833 table[entry.replace(stripPrefix, "", 1)] = BackupFileList._generateDigest(entry) 834 else: 835 for entry in self: 836 if os.path.isfile(entry) and not os.path.islink(entry): 837 table[entry] = BackupFileList._generateDigest(entry) 838 return table
839
840 - def _generateDigest(path):
841 """ 842 Generates an SHA digest for a given file on disk. 843 844 The original code for this function used this simplistic implementation, 845 which requires reading the entire file into memory at once in order to 846 generate a digest value:: 847 848 sha.new(open(path).read()).hexdigest() 849 850 Not surprisingly, this isn't an optimal solution. The U{Simple file 851 hashing <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259109>} 852 Python Cookbook recipe describes how to incrementally generate a hash 853 value by reading in chunks of data rather than reading the file all at 854 once. The recipe relies on the the C{update()} method of the various 855 Python hashing algorithms. 856 857 In my tests using a 110 MB file on CD, the original implementation 858 requires 111 seconds. This implementation requires only 40-45 seconds, 859 which is a pretty substantial speed-up. 860 861 Practice shows that reading in around 4kB (4096 bytes) at a time yields 862 the best performance. Smaller reads are quite a bit slower, and larger 863 reads don't make much of a difference. The 4kB number makes me a little 864 suspicious, and I think it might be related to the size of a filesystem 865 read at the hardware level. However, I've decided to just hardcode 4096 866 until I have evidence that shows it's worthwhile making the read size 867 configurable. 868 869 @param path: Path to generate digest for. 870 871 @return: ASCII-safe SHA digest for the file. 872 @raise OSError: If the file cannot be opened. 873 """ 874 s = sha.new() 875 f = open(path, mode="rb") # in case platform cares about binary reads 876 readBytes = 4096 # see notes above 877 while(readBytes > 0): 878 readString = f.read(readBytes) 879 s.update(readString) 880 readBytes = len(readString) 881 f.close() 882 digest = s.hexdigest() 883 logger.debug("Generated digest [%s] for file [%s]." % (digest, path)) 884 return digest
885 _generateDigest = staticmethod(_generateDigest) 886
887 - def generateFitted(self, capacity, algorithm="worst_fit"):
888 """ 889 Generates a list of items that fit in the indicated capacity. 890 891 Sometimes, callers would like to include every item in a list, but are 892 unable to because not all of the items fit in the space available. This 893 method returns a copy of the list, containing only the items that fit in 894 a given capacity. A copy is returned so that we don't lose any 895 information if for some reason the fitted list is unsatisfactory. 896 897 The fitting is done using the functions in the knapsack module. By 898 default, the first fit algorithm is used, but you can also choose 899 from best fit, worst fit and alternate fit. 900 901 @param capacity: Maximum capacity among the files in the new list 902 @type capacity: Integer, in bytes 903 904 @param algorithm: Knapsack (fit) algorithm to use 905 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 906 907 @return: Copy of list with total size no larger than indicated capacity 908 @raise ValueError: If the algorithm is invalid. 909 """ 910 table = self._getKnapsackTable() 911 function = BackupFileList._getKnapsackFunction(algorithm) 912 return function(table, capacity)[0]
913
914 - def generateSpan(self, capacity, algorithm="worst_fit"):
915 """ 916 Splits the list of items into sub-lists that fit in a given capacity. 917 918 Sometimes, callers need split to a backup file list into a set of smaller 919 lists. For instance, you could use this to "span" the files across a set 920 of discs. 921 922 The fitting is done using the functions in the knapsack module. By 923 default, the first fit algorithm is used, but you can also choose 924 from best fit, worst fit and alternate fit. 925 926 @note: If any of your items are larger than the capacity, then it won't 927 be possible to find a solution. In this case, a value error will be 928 raised. 929 930 @param capacity: Maximum capacity among the files in the new list 931 @type capacity: Integer, in bytes 932 933 @param algorithm: Knapsack (fit) algorithm to use 934 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 935 936 @return: List of L{SpanItem} objects. 937 938 @raise ValueError: If the algorithm is invalid. 939 @raise ValueError: If it's not possible to fit some items 940 """ 941 spanItems = [] 942 function = BackupFileList._getKnapsackFunction(algorithm) 943 table = self._getKnapsackTable(capacity) 944 iteration = 0 945 while len(table) > 0: 946 iteration += 1 947 fit = function(table, capacity) 948 if len(fit[0]) == 0: 949 # Should never happen due to validations in _convertToKnapsackForm(), but let's be safe 950 raise ValueError("After iteration %d, unable to add any new items." % iteration) 951 removeKeys(table, fit[0]) 952 utilization = (float(fit[1])/float(capacity))*100.0 953 item = SpanItem(fit[0], fit[1], capacity, utilization) 954 spanItems.append(item) 955 return spanItems
956
957 - def _getKnapsackTable(self, capacity=None):
958 """ 959 Converts the list into the form needed by the knapsack algorithms. 960 @return: Dictionary mapping file name to tuple of (file path, file size). 961 """ 962 table = { } 963 for entry in self: 964 if os.path.islink(entry): 965 table[entry] = (entry, 0.0) 966 elif os.path.isfile(entry): 967 size = float(os.stat(entry).st_size) 968 if capacity is not None: 969 if size > capacity: 970 raise ValueError("File [%s] cannot fit in capacity %s." % (entry, displayBytes(capacity))) 971 table[entry] = (entry, size) 972 return table
973
974 - def _getKnapsackFunction(algorithm):
975 """ 976 Returns a reference to the function associated with an algorithm name. 977 Algorithm name must be one of "first_fit", "best_fit", "worst_fit", "alternate_fit" 978 @param algorithm: Name of the algorithm 979 @return: Reference to knapsack function 980 @raise ValueError: If the algorithm name is unknown. 981 """ 982 if algorithm == "first_fit": 983 return firstFit 984 elif algorithm == "best_fit": 985 return bestFit 986 elif algorithm == "worst_fit": 987 return worstFit 988 elif algorithm == "alternate_fit": 989 return alternateFit 990 else: 991 raise ValueError("Algorithm [%s] is invalid." % algorithm);
992 _getKnapsackFunction = staticmethod(_getKnapsackFunction) 993
994 - def generateTarfile(self, path, mode='tar', ignore=False, flat=False):
995 """ 996 Creates a tar file containing the files in the list. 997 998 By default, this method will create uncompressed tar files. If you pass 999 in mode C{'targz'}, then it will create gzipped tar files, and if you 1000 pass in mode C{'tarbz2'}, then it will create bzipped tar files. 1001 1002 The tar file will be created as a GNU tar archive, which enables extended 1003 file name lengths, etc. Since GNU tar is so prevalent, I've decided that 1004 the extra functionality out-weighs the disadvantage of not being 1005 "standard". 1006 1007 If you pass in C{flat=True}, then a "flat" archive will be created, and 1008 all of the files will be added to the root of the archive. So, the file 1009 C{/tmp/something/whatever.txt} would be added as just C{whatever.txt}. 1010 1011 By default, the whole method call fails if there are problems adding any 1012 of the files to the archive, resulting in an exception. Under these 1013 circumstances, callers are advised that they might want to call 1014 L{removeInvalid()} and then attempt to extract the tar file a second 1015 time, since the most common cause of failures is a missing file (a file 1016 that existed when the list was built, but is gone again by the time the 1017 tar file is built). 1018 1019 If you want to, you can pass in C{ignore=True}, and the method will 1020 ignore errors encountered when adding individual files to the archive 1021 (but not errors opening and closing the archive itself). 1022 1023 We'll always attempt to remove the tarfile from disk if an exception will 1024 be thrown. 1025 1026 @note: No validation is done as to whether the entries in the list are 1027 files, since only files or soft links should be in an object like this. 1028 However, to be safe, everything is explicitly added to the tar archive 1029 non-recursively so it's safe to include soft links to directories. 1030 1031 @note: The Python C{tarfile} module, which is used internally here, is 1032 supposed to deal properly with long filenames and links. In my testing, 1033 I have found that it appears to be able to add long really long filenames 1034 to archives, but doesn't do a good job reading them back out, even out of 1035 an archive it created. Fortunately, all Cedar Backup does is add files 1036 to archives. 1037 1038 @param path: Path of tar file to create on disk 1039 @type path: String representing a path on disk 1040 1041 @param mode: Tar creation mode 1042 @type mode: One of either C{'tar'}, C{'targz'} or C{'tarbz2'} 1043 1044 @param ignore: Indicates whether to ignore certain errors. 1045 @type ignore: Boolean 1046 1047 @param flat: Creates "flat" archive by putting all items in root 1048 @type flat: Boolean 1049 1050 @raise ValueError: If mode is not valid 1051 @raise ValueError: If list is empty 1052 @raise ValueError: If the path could not be encoded properly. 1053 @raise TarError: If there is a problem creating the tar file 1054 """ 1055 path = encodePath(path) 1056 if len(self) == 0: raise ValueError("Empty list cannot be used to generate tarfile.") 1057 if(mode == 'tar'): tarmode = "w:" 1058 elif(mode == 'targz'): tarmode = "w:gz" 1059 elif(mode == 'tarbz2'): tarmode = "w:bz2" 1060 else: raise ValueError("Mode [%s] is not valid." % mode) 1061 try: 1062 tar = tarfile.open(path, tarmode) 1063 tar.posix = False # make a GNU-compatible archive without file length limits 1064 for entry in self: 1065 try: 1066 if flat: 1067 tar.add(entry, arcname=os.path.basename(entry), recursive=False) 1068 else: 1069 tar.add(entry, recursive=False) 1070 except tarfile.TarError, e: 1071 if not ignore: 1072 raise e 1073 logger.info("Unable to add file [%s]; going on anyway." % entry) 1074 except OSError, e: 1075 if not ignore: 1076 raise tarfile.TarError(e) 1077 logger.info("Unable to add file [%s]; going on anyway." % entry) 1078 tar.close() 1079 except tarfile.ReadError, e: 1080 try: tar.close() 1081 except: pass 1082 if os.path.exists(path): 1083 try: os.remove(path) 1084 except: pass 1085 raise tarfile.ReadError("Unable to open [%s]; maybe directory doesn't exist?" % path) 1086 except tarfile.TarError, e: 1087 try: tar.close() 1088 except: pass 1089 if os.path.exists(path): 1090 try: os.remove(path) 1091 except: pass 1092 raise e
1093
1094 - def removeUnchanged(self, digestMap, captureDigest=False):
1095 """ 1096 Removes unchanged entries from the list. 1097 1098 This method relies on a digest map as returned from L{generateDigestMap}. 1099 For each entry in C{digestMap}, if the entry also exists in the current 1100 list I{and} the entry in the current list has the same digest value as in 1101 the map, the entry in the current list will be removed. 1102 1103 This method offers a convenient way for callers to filter unneeded 1104 entries from a list. The idea is that a caller will capture a digest map 1105 from C{generateDigestMap} at some point in time (perhaps the beginning of 1106 the week), and will save off that map using C{pickle} or some other 1107 method. Then, the caller could use this method sometime in the future to 1108 filter out any unchanged files based on the saved-off map. 1109 1110 If C{captureDigest} is passed-in as C{True}, then digest information will 1111 be captured for the entire list before the removal step occurs using the 1112 same rules as in L{generateDigestMap}. The check will involve a lookup 1113 into the complete digest map. 1114 1115 If C{captureDigest} is passed in as C{False}, we will only generate a 1116 digest value for files we actually need to check, and we'll ignore any 1117 entry in the list which isn't a file that currently exists on disk. 1118 1119 The return value varies depending on C{captureDigest}, as well. To 1120 preserve backwards compatibility, if C{captureDigest} is C{False}, then 1121 we'll just return a single value representing the number of entries 1122 removed. Otherwise, we'll return a tuple of C{(entries removed, digest 1123 map)}. The returned digest map will be in exactly the form returned by 1124 L{generateDigestMap}. 1125 1126 @note: For performance reasons, this method actually ends up rebuilding 1127 the list from scratch. First, we build a temporary dictionary containing 1128 all of the items from the original list. Then, we remove items as needed 1129 from the dictionary (which is faster than the equivalent operation on a 1130 list). Finally, we replace the contents of the current list based on the 1131 keys left in the dictionary. This should be transparent to the caller. 1132 1133 @param digestMap: Dictionary mapping file name to digest value. 1134 @type digestMap: Map as returned from L{generateDigestMap}. 1135 1136 @param captureDigest: Indicates that digest information should be captured. 1137 @type captureDigest: Boolean 1138 1139 @return: Number of entries removed 1140 """ 1141 if captureDigest: 1142 removed = 0 1143 table = {} 1144 captured = {} 1145 for entry in self: 1146 if os.path.isfile(entry) and not os.path.islink(entry): 1147 table[entry] = BackupFileList._generateDigest(entry) 1148 captured[entry] = table[entry] 1149 else: 1150 table[entry] = None 1151 for entry in digestMap.keys(): 1152 if table.has_key(entry): 1153 if table[entry] is not None: # equivalent to file/link check in other case 1154 digest = table[entry] 1155 if digest == digestMap[entry]: 1156 removed += 1 1157 del table[entry] 1158 logger.debug("Discarded unchanged file [%s]." % entry) 1159 self[:] = table.keys() 1160 return (removed, captured) 1161 else: 1162 removed = 0 1163 table = {} 1164 for entry in self: 1165 table[entry] = None 1166 for entry in digestMap.keys(): 1167 if table.has_key(entry): 1168 if os.path.isfile(entry) and not os.path.islink(entry): 1169 digest = BackupFileList._generateDigest(entry) 1170 if digest == digestMap[entry]: 1171 removed += 1 1172 del table[entry] 1173 logger.debug("Discarded unchanged file [%s]." % entry) 1174 self[:] = table.keys() 1175 return removed
1176 1177 1178 ######################################################################## 1179 # PurgeItemList class definition 1180 ######################################################################## 1181
1182 -class PurgeItemList(FilesystemList):
1183 1184 ###################### 1185 # Class documentation 1186 ###################### 1187 1188 """ 1189 List of files and directories to be purged. 1190 1191 A PurgeItemList is a L{FilesystemList} containing a list of files and 1192 directories to be purged. On top of the generic functionality provided by 1193 L{FilesystemList}, this class adds functionality to remove items that are 1194 too young to be purged, and to actually remove each item in the list from 1195 the filesystem. 1196 1197 The other main difference is that when you add a directory's contents to a 1198 purge item list, the directory itself is not added to the list. This way, 1199 if someone asks to purge within in C{/opt/backup/collect}, that directory 1200 doesn't get removed once all of the files within it is gone. 1201 """ 1202 1203 ############## 1204 # Constructor 1205 ############## 1206
1207 - def __init__(self):
1208 """Initializes a list with no configured exclusions.""" 1209 FilesystemList.__init__(self)
1210 1211 1212 ############## 1213 # Add methods 1214 ############## 1215
1216 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0):
1217 """ 1218 Adds the contents of a directory to the list. 1219 1220 The path must exist and must be a directory or a link to a directory. 1221 The contents of the directory (but I{not} the directory path itself) will 1222 be recursively added to the list, subject to any exclusions that are in 1223 place. If you only want the directory and its contents to be added, then 1224 pass in C{recursive=False}. 1225 1226 @note: If a directory's absolute path matches an exclude pattern or path, 1227 or if the directory contains the configured ignore file, then the 1228 directory and all of its contents will be recursively excluded from the 1229 list. 1230 1231 @note: If the passed-in directory happens to be a soft link, it will be 1232 recursed. However, the linkDepth parameter controls whether any soft 1233 links I{within} the directory will be recursed. The link depth is 1234 maximum depth of the tree at which soft links should be followed. So, a 1235 depth of 0 does not follow any soft links, a depth of 1 follows only 1236 links within the passed-in directory, a depth of 2 follows the links at 1237 the next level down, etc. 1238 1239 @note: Any invalid soft links (i.e. soft links that point to 1240 non-existent items) will be silently ignored. 1241 1242 @note: The L{excludeDirs} flag only controls whether any given soft link 1243 path itself is added to the list once it has been discovered. It does 1244 I{not} modify any behavior related to directory recursion. 1245 1246 @note: The L{excludeDirs} flag only controls whether any given directory 1247 path itself is added to the list once it has been discovered. It does 1248 I{not} modify any behavior related to directory recursion. 1249 1250 @param path: Directory path whose contents should be added to the list 1251 @type path: String representing a path on disk 1252 1253 @param recursive: Indicates whether directory contents should be added recursively. 1254 @type recursive: Boolean value 1255 1256 @param addSelf: Ignored in this subclass. 1257 1258 @param linkDepth: Depth of soft links that should be followed 1259 @type linkDepth: Integer value, where zero means not to follow any soft links 1260 1261 @return: Number of items recursively added to the list 1262 1263 @raise ValueError: If path is not a directory or does not exist. 1264 @raise ValueError: If the path could not be encoded properly. 1265 """ 1266 path = encodePath(path) 1267 path = normalizeDir(path) 1268 return super(PurgeItemList, self)._addDirContentsInternal(path, False, recursive, linkDepth)
1269 1270 1271 ################## 1272 # Utility methods 1273 ################## 1274
1275 - def removeYoungFiles(self, daysOld):
1276 """ 1277 Removes from the list files younger than a certain age (in days). 1278 1279 Any file whose "age" in days is less than (C{<}) the value of the 1280 C{daysOld} parameter will be removed from the list so that it will not be 1281 purged later when L{purgeItems} is called. Directories and soft links 1282 will be ignored. 1283 1284 The "age" of a file is the amount of time since the file was last used, 1285 per the most recent of the file's C{st_atime} and C{st_mtime} values. 1286 1287 @note: Some people find the "sense" of this method confusing or 1288 "backwards". Keep in mind that this method is used to remove items 1289 I{from the list}, not from the filesystem! It removes from the list 1290 those items that you would I{not} want to purge because they are too 1291 young. As an example, passing in C{daysOld} of zero (0) would remove 1292 from the list no files, which would result in purging all of the files 1293 later. I would be happy to make a synonym of this method with an 1294 easier-to-understand "sense", if someone can suggest one. 1295 1296 @param daysOld: Minimum age of files that are to be kept in the list. 1297 @type daysOld: Integer value >= 0. 1298 1299 @return: Number of entries removed 1300 """ 1301 removed = 0 1302 daysOld = int(daysOld) 1303 if daysOld < 0: 1304 raise ValueError("Days old value must be an integer >= 0.") 1305 for entry in self[:]: 1306 if os.path.isfile(entry) and not os.path.islink(entry): 1307 try: 1308 age = calculateFileAge(entry) 1309 if age < daysOld: 1310 removed += 1 1311 self.remove(entry) 1312 except OSError: 1313 pass 1314 return removed
1315
1316 - def purgeItems(self):
1317 """ 1318 Purges all items in the list. 1319 1320 Every item in the list will be purged. Directories in the list will 1321 I{not} be purged recursively, and hence will only be removed if they are 1322 empty. Errors will be ignored. 1323 1324 To faciliate easy removal of directories that will end up being empty, 1325 the delete process happens in two passes: files first (including soft 1326 links), then directories. 1327 1328 @return: Tuple containing count of (files, dirs) removed 1329 """ 1330 files = 0 1331 dirs = 0 1332 for entry in self: 1333 if os.path.exists(entry) and (os.path.isfile(entry) or os.path.islink(entry)): 1334 try: 1335 os.remove(entry) 1336 files += 1 1337 logger.debug("Purged file [%s]." % entry) 1338 except OSError: 1339 pass 1340 for entry in self: 1341 if os.path.exists(entry) and os.path.isdir(entry) and not os.path.islink(entry): 1342 try: 1343 os.rmdir(entry) 1344 dirs += 1 1345 logger.debug("Purged empty directory [%s]." % entry) 1346 except OSError: 1347 pass 1348 return (files, dirs)
1349 1350 1351 ######################################################################## 1352 # Public functions 1353 ######################################################################## 1354 1355 ########################## 1356 # normalizeDir() function 1357 ########################## 1358
1359 -def normalizeDir(path):
1360 """ 1361 Normalizes a directory name. 1362 1363 For our purposes, a directory name is normalized by removing the trailing 1364 path separator, if any. This is important because we want directories to 1365 appear within lists in a consistent way, although from the user's 1366 perspective passing in C{/path/to/dir/} and C{/path/to/dir} are equivalent. 1367 1368 @param path: Path to be normalized. 1369 @type path: String representing a path on disk 1370 1371 @return: Normalized path, which should be equivalent to the original. 1372 """ 1373 if path != os.sep and path[-1:] == os.sep: 1374 return path[:-1] 1375 return path
1376 1377 1378 ############################# 1379 # compareContents() function 1380 ############################# 1381
1382 -def compareContents(path1, path2, verbose=False):
1383 """ 1384 Compares the contents of two directories to see if they are equivalent. 1385 1386 The two directories are recursively compared. First, we check whether they 1387 contain exactly the same set of files. Then, we check to see every given 1388 file has exactly the same contents in both directories. 1389 1390 This is all relatively simple to implement through the magic of 1391 L{BackupFileList.generateDigestMap}, which knows how to strip a path prefix 1392 off the front of each entry in the mapping it generates. This makes our 1393 comparison as simple as creating a list for each path, then generating a 1394 digest map for each path and comparing the two. 1395 1396 If no exception is thrown, the two directories are considered identical. 1397 1398 If the C{verbose} flag is C{True}, then an alternate (but slower) method is 1399 used so that any thrown exception can indicate exactly which file caused the 1400 comparison to fail. The thrown C{ValueError} exception distinguishes 1401 between the directories containing different files, and containing the same 1402 files with differing content. 1403 1404 @note: Symlinks are I{not} followed for the purposes of this comparison. 1405 1406 @param path1: First path to compare. 1407 @type path1: String representing a path on disk 1408 1409 @param path2: First path to compare. 1410 @type path2: String representing a path on disk 1411 1412 @param verbose: Indicates whether a verbose response should be given. 1413 @type verbose: Boolean 1414 1415 @raise ValueError: If a directory doesn't exist or can't be read. 1416 @raise ValueError: If the two directories are not equivalent. 1417 @raise IOError: If there is an unusual problem reading the directories. 1418 """ 1419 try: 1420 path1List = BackupFileList() 1421 path1List.addDirContents(path1) 1422 path1Digest = path1List.generateDigestMap(stripPrefix=normalizeDir(path1)) 1423 path2List = BackupFileList() 1424 path2List.addDirContents(path2) 1425 path2Digest = path2List.generateDigestMap(stripPrefix=normalizeDir(path2)) 1426 compareDigestMaps(path1Digest, path2Digest, verbose) 1427 except IOError, e: 1428 logger.error("I/O error encountered during consistency check.") 1429 raise e
1430
1431 -def compareDigestMaps(digest1, digest2, verbose=False):
1432 """ 1433 Compares two digest maps and throws an exception if they differ. 1434 1435 @param digest1: First digest to compare. 1436 @type digest1: Digest as returned from BackupFileList.generateDigestMap() 1437 1438 @param digest2: Second digest to compare. 1439 @type digest2: Digest as returned from BackupFileList.generateDigestMap() 1440 1441 @param verbose: Indicates whether a verbose response should be given. 1442 @type verbose: Boolean 1443 1444 @raise ValueError: If the two directories are not equivalent. 1445 """ 1446 if not verbose: 1447 if digest1 != digest2: 1448 raise ValueError("Consistency check failed.") 1449 else: 1450 list1 = UnorderedList(digest1.keys()) 1451 list2 = UnorderedList(digest2.keys()) 1452 if list1 != list2: 1453 raise ValueError("Directories contain a different set of files.") 1454 for key in list1: 1455 if digest1[key] != digest2[key]: 1456 raise ValueError("File contents for [%s] vary between directories." % key)
1457