Package CedarBackup2 :: Module filesystem
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.filesystem

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2004-2008 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # This program is free software; you can redistribute it and/or 
  15  # modify it under the terms of the GNU General Public License, 
  16  # Version 2, as published by the Free Software Foundation. 
  17  # 
  18  # This program is distributed in the hope that it will be useful, 
  19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  21  # 
  22  # Copies of the GNU General Public License are available from 
  23  # the Free Software Foundation website, http://www.gnu.org/. 
  24  # 
  25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  26  # 
  27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  28  # Language : Python (>= 2.3) 
  29  # Project  : Cedar Backup, release 2 
  30  # Revision : $Id: filesystem.py 879 2008-03-20 04:00:23Z pronovic $ 
  31  # Purpose  : Provides filesystem-related objects. 
  32  # 
  33  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  34   
  35  ######################################################################## 
  36  # Module documentation 
  37  ######################################################################## 
  38   
  39  """ 
  40  Provides filesystem-related objects. 
  41  @sort: FilesystemList, BackupFileList, PurgeItemList 
  42  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  43  """ 
  44   
  45   
  46  ######################################################################## 
  47  # Imported modules 
  48  ######################################################################## 
  49   
  50  # System modules 
  51  import sys 
  52  import os 
  53  import re 
  54  import sha 
  55  import math 
  56  import logging 
  57  import tarfile 
  58   
  59  # Cedar Backup modules 
  60  from CedarBackup2.knapsack import firstFit, bestFit, worstFit, alternateFit 
  61  from CedarBackup2.util import AbsolutePathList, ObjectTypeList, UnorderedList, RegexList 
  62  from CedarBackup2.util import removeKeys, displayBytes, calculateFileAge, encodePath 
  63   
  64   
  65  ######################################################################## 
  66  # Module-wide variables 
  67  ######################################################################## 
  68   
  69  logger = logging.getLogger("CedarBackup2.log.filesystem") 
  70   
  71   
  72  ######################################################################## 
  73  # FilesystemList class definition 
  74  ######################################################################## 
  75   
76 -class FilesystemList(list):
77 78 ###################### 79 # Class documentation 80 ###################### 81 82 """ 83 Represents a list of filesystem items. 84 85 This is a generic class that represents a list of filesystem items. Callers 86 can add individual files or directories to the list, or can recursively add 87 the contents of a directory. The class also allows for up-front exclusions 88 in several forms (all files, all directories, all items matching a pattern, 89 all items whose basename matches a pattern, or all directories containing a 90 specific "ignore file"). Symbolic links are typically backed up 91 non-recursively, i.e. the link to a directory is backed up, but not the 92 contents of that link (we don't want to deal with recursive loops, etc.). 93 94 The custom methods such as L{addFile} will only add items if they exist on 95 the filesystem and do not match any exclusions that are already in place. 96 However, since a FilesystemList is a subclass of Python's standard list 97 class, callers can also add items to the list in the usual way, using 98 methods like C{append()} or C{insert()}. No validations apply to items 99 added to the list in this way; however, many list-manipulation methods deal 100 "gracefully" with items that don't exist in the filesystem, often by 101 ignoring them. 102 103 Once a list has been created, callers can remove individual items from the 104 list using standard methods like C{pop()} or C{remove()} or they can use 105 custom methods to remove specific types of entries or entries which match a 106 particular pattern. 107 108 @note: Regular expression patterns that apply to paths are assumed to be 109 bounded at front and back by the beginning and end of the string, i.e. they 110 are treated as if they begin with C{^} and end with C{$}. This is true 111 whether we are matching a complete path or a basename. 112 113 @note: Some platforms, like Windows, do not support soft links. On those 114 platforms, the ignore-soft-links flag can be set, but it won't do any good 115 because the operating system never reports a file as a soft link. 116 117 @sort: __init__, addFile, addDir, addDirContents, removeFiles, removeDirs, 118 removeLinks, removeMatch, removeInvalid, normalize, validate, 119 excludeFiles, excludeDirs, excludeLinks, excludePaths, 120 excludePatterns, excludeBasenamePatterns, ignoreFile 121 """ 122 123 124 ############## 125 # Constructor 126 ############## 127
128 - def __init__(self):
129 """Initializes a list with no configured exclusions.""" 130 list.__init__(self) 131 self._excludeFiles = False 132 self._excludeDirs = False 133 self._excludeLinks = False 134 self._excludePaths = None 135 self._excludePatterns = None 136 self._excludeBasenamePatterns = None 137 self._ignoreFile = None 138 self.excludeFiles = False 139 self.excludeLinks = False 140 self.excludeDirs = False 141 self.excludePaths = [] 142 self.excludePatterns = RegexList() 143 self.excludeBasenamePatterns = RegexList() 144 self.ignoreFile = None
145 146 147 ############# 148 # Properties 149 ############# 150
151 - def _setExcludeFiles(self, value):
152 """ 153 Property target used to set the exclude files flag. 154 No validations, but we normalize the value to C{True} or C{False}. 155 """ 156 if value: 157 self._excludeFiles = True 158 else: 159 self._excludeFiles = False
160
161 - def _getExcludeFiles(self):
162 """ 163 Property target used to get the exclude files flag. 164 """ 165 return self._excludeFiles
166
167 - def _setExcludeDirs(self, value):
168 """ 169 Property target used to set the exclude directories flag. 170 No validations, but we normalize the value to C{True} or C{False}. 171 """ 172 if value: 173 self._excludeDirs = True 174 else: 175 self._excludeDirs = False
176
177 - def _getExcludeDirs(self):
178 """ 179 Property target used to get the exclude directories flag. 180 """ 181 return self._excludeDirs
182 192 198
199 - def _setExcludePaths(self, value):
200 """ 201 Property target used to set the exclude paths list. 202 A C{None} value is converted to an empty list. 203 Elements do not have to exist on disk at the time of assignment. 204 @raise ValueError: If any list element is not an absolute path. 205 """ 206 self._absoluteExcludePaths = AbsolutePathList() 207 if value is not None: 208 self._absoluteExcludePaths.extend(value)
209
210 - def _getExcludePaths(self):
211 """ 212 Property target used to get the absolute exclude paths list. 213 """ 214 return self._absoluteExcludePaths
215
216 - def _setExcludePatterns(self, value):
217 """ 218 Property target used to set the exclude patterns list. 219 A C{None} value is converted to an empty list. 220 """ 221 self._excludePatterns = RegexList() 222 if value is not None: 223 self._excludePatterns.extend(value)
224
225 - def _getExcludePatterns(self):
226 """ 227 Property target used to get the exclude patterns list. 228 """ 229 return self._excludePatterns
230
231 - def _setExcludeBasenamePatterns(self, value):
232 """ 233 Property target used to set the exclude basename patterns list. 234 A C{None} value is converted to an empty list. 235 """ 236 self._excludeBasenamePatterns = RegexList() 237 if value is not None: 238 self._excludeBasenamePatterns.extend(value)
239
241 """ 242 Property target used to get the exclude basename patterns list. 243 """ 244 return self._excludeBasenamePatterns
245
246 - def _setIgnoreFile(self, value):
247 """ 248 Property target used to set the ignore file. 249 The value must be a non-empty string if it is not C{None}. 250 @raise ValueError: If the value is an empty string. 251 """ 252 if value is not None: 253 if len(value) < 1: 254 raise ValueError("The ignore file must be a non-empty string.") 255 self._ignoreFile = value
256
257 - def _getIgnoreFile(self):
258 """ 259 Property target used to get the ignore file. 260 """ 261 return self._ignoreFile
262 263 excludeFiles = property(_getExcludeFiles, _setExcludeFiles, None, "Boolean indicating whether files should be excluded.") 264 excludeDirs = property(_getExcludeDirs, _setExcludeDirs, None, "Boolean indicating whether directories should be excluded.") 265 excludeLinks = property(_getExcludeLinks, _setExcludeLinks, None, "Boolean indicating whether soft links should be excluded.") 266 excludePaths = property(_getExcludePaths, _setExcludePaths, None, "List of absolute paths to be excluded.") 267 excludePatterns = property(_getExcludePatterns, _setExcludePatterns, None, 268 "List of regular expression patterns (matching complete path) to be excluded.") 269 excludeBasenamePatterns = property(_getExcludeBasenamePatterns, _setExcludeBasenamePatterns, 270 None, "List of regular expression patterns (matching basename) to be excluded.") 271 ignoreFile = property(_getIgnoreFile, _setIgnoreFile, None, "Name of file which will cause directory contents to be ignored.") 272 273 274 ############## 275 # Add methods 276 ############## 277
278 - def addFile(self, path):
279 """ 280 Adds a file to the list. 281 282 The path must exist and must be a file or a link to an existing file. It 283 will be added to the list subject to any exclusions that are in place. 284 285 @param path: File path to be added to the list 286 @type path: String representing a path on disk 287 288 @return: Number of items added to the list. 289 290 @raise ValueError: If path is not a file or does not exist. 291 @raise ValueError: If the path could not be encoded properly. 292 """ 293 path = encodePath(path) 294 if not os.path.exists(path) or not os.path.isfile(path): 295 logger.debug("Path [%s] is not a file or does not exist on disk." % path) 296 raise ValueError("Path is not a file or does not exist on disk.") 297 if self.excludeLinks and os.path.islink(path): 298 logger.debug("Path [%s] is excluded based on excludeLinks." % path) 299 return 0 300 if self.excludeFiles: 301 logger.debug("Path [%s] is excluded based on excludeFiles." % path) 302 return 0 303 if path in self.excludePaths: 304 logger.debug("Path [%s] is excluded based on excludePaths." % path) 305 return 0 306 for pattern in self.excludePatterns: 307 if re.compile(r"^%s$" % pattern).match(path): # safe to assume all are valid due to RegexList 308 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 309 return 0 310 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 311 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 312 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 313 return 0 314 self.append(path) 315 logger.debug("Added file to list: [%s]" % path) 316 return 1
317
318 - def addDir(self, path):
319 """ 320 Adds a directory to the list. 321 322 The path must exist and must be a directory or a link to an existing 323 directory. It will be added to the list subject to any exclusions that 324 are in place. The L{ignoreFile} does not apply to this method, only to 325 L{addDirContents}. 326 327 @param path: Directory path to be added to the list 328 @type path: String representing a path on disk 329 330 @return: Number of items added to the list. 331 332 @raise ValueError: If path is not a directory or does not exist. 333 @raise ValueError: If the path could not be encoded properly. 334 """ 335 path = encodePath(path) 336 path = normalizeDir(path) 337 if not os.path.exists(path) or not os.path.isdir(path): 338 logger.debug("Path [%s] is not a directory or does not exist on disk." % path) 339 raise ValueError("Path is not a directory or does not exist on disk.") 340 if self.excludeLinks and os.path.islink(path): 341 logger.debug("Path [%s] is excluded based on excludeLinks." % path) 342 return 0 343 if self.excludeDirs: 344 logger.debug("Path [%s] is excluded based on excludeDirs." % path) 345 return 0 346 if path in self.excludePaths: 347 logger.debug("Path [%s] is excluded based on excludePaths." % path) 348 return 0 349 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 350 if re.compile(r"^%s$" % pattern).match(path): 351 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 352 return 0 353 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 354 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 355 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 356 return 0 357 self.append(path) 358 logger.debug("Added directory to list: [%s]" % path) 359 return 1
360
361 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0):
362 """ 363 Adds the contents of a directory to the list. 364 365 The path must exist and must be a directory or a link to a directory. 366 The contents of the directory (as well as the directory path itself) will 367 be recursively added to the list, subject to any exclusions that are in 368 place. If you only want the directory and its immediate contents to be 369 added, then pass in C{recursive=False}. 370 371 @note: If a directory's absolute path matches an exclude pattern or path, 372 or if the directory contains the configured ignore file, then the 373 directory and all of its contents will be recursively excluded from the 374 list. 375 376 @note: If the passed-in directory happens to be a soft link, it will be 377 recursed. However, the linkDepth parameter controls whether any soft 378 links I{within} the directory will be recursed. The link depth is 379 maximum depth of the tree at which soft links should be followed. So, a 380 depth of 0 does not follow any soft links, a depth of 1 follows only 381 links within the passed-in directory, a depth of 2 follows the links at 382 the next level down, etc. 383 384 @note: Any invalid soft links (i.e. soft links that point to 385 non-existent items) will be silently ignored. 386 387 @note: The L{excludeDirs} flag only controls whether any given directory 388 path itself is added to the list once it has been discovered. It does 389 I{not} modify any behavior related to directory recursion. 390 391 @param path: Directory path whose contents should be added to the list 392 @type path: String representing a path on disk 393 394 @param recursive: Indicates whether directory contents should be added recursively. 395 @type recursive: Boolean value 396 397 @param addSelf: Indicates whether the directory itself should be added to the list. 398 @type addSelf: Boolean value 399 400 @param linkDepth: Maximum depth of the tree at which soft links should be followed 401 @type linkDepth: Integer value, where zero means not to follow any soft links 402 403 @return: Number of items recursively added to the list 404 405 @raise ValueError: If path is not a directory or does not exist. 406 @raise ValueError: If the path could not be encoded properly. 407 """ 408 path = encodePath(path) 409 path = normalizeDir(path) 410 return self._addDirContentsInternal(path, addSelf, recursive, linkDepth)
411
412 - def _addDirContentsInternal(self, path, includePath=True, recursive=True, linkDepth=0):
413 """ 414 Internal implementation of C{addDirContents}. 415 416 This internal implementation exists due to some refactoring. Basically, 417 some subclasses have a need to add the contents of a directory, but not 418 the directory itself. This is different than the standard C{FilesystemList} 419 behavior and actually ends up making a special case out of the first 420 call in the recursive chain. Since I don't want to expose the modified 421 interface, C{addDirContents} ends up being wholly implemented in terms 422 of this method. 423 424 The linkDepth parameter controls whether soft links are followed when we 425 are adding the contents recursively. Any recursive calls reduce the 426 value by one. If the value zero or less, then soft links will just be 427 added as directories, but will not be followed. 428 429 @param path: Directory path whose contents should be added to the list. 430 @param includePath: Indicates whether to include the path as well as contents. 431 @param recursive: Indicates whether directory contents should be added recursively. 432 @param linkDepth: Depth of soft links that should be followed 433 434 @return: Number of items recursively added to the list 435 436 @raise ValueError: If path is not a directory or does not exist. 437 """ 438 added = 0 439 if not os.path.exists(path) or not os.path.isdir(path): 440 logger.debug("Path [%s] is not a directory or does not exist on disk." % path) 441 raise ValueError("Path is not a directory or does not exist on disk.") 442 if path in self.excludePaths: 443 logger.debug("Path [%s] is excluded based on excludePaths." % path) 444 return added 445 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 446 if re.compile(r"^%s$" % pattern).match(path): 447 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 448 return added 449 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 450 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 451 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 452 return added 453 if self.ignoreFile is not None and os.path.exists(os.path.join(path, self.ignoreFile)): 454 logger.debug("Path [%s] is excluded based on ignore file." % path) 455 return added 456 if includePath: 457 added += self.addDir(path) # could actually be excluded by addDir, yet 458 for entry in os.listdir(path): 459 entrypath = os.path.join(path, entry) 460 if os.path.isfile(entrypath): 461 added += self.addFile(entrypath) 462 elif os.path.isdir(entrypath): 463 if os.path.islink(entrypath): 464 if recursive and linkDepth > 0: 465 newDepth = linkDepth - 1; 466 added += self._addDirContentsInternal(entrypath, linkDepth=newDepth) 467 else: 468 added += self.addDir(entrypath) 469 else: 470 if recursive: 471 newDepth = linkDepth - 1; 472 added += self._addDirContentsInternal(entrypath, linkDepth=newDepth) 473 else: 474 added += self.addDir(entrypath) 475 return added
476 477 478 ################# 479 # Remove methods 480 ################# 481
482 - def removeFiles(self, pattern=None):
483 """ 484 Removes file entries from the list. 485 486 If C{pattern} is not passed in or is C{None}, then all file entries will 487 be removed from the list. Otherwise, only those file entries matching 488 the pattern will be removed. Any entry which does not exist on disk 489 will be ignored (use L{removeInvalid} to purge those entries). 490 491 This method might be fairly slow for large lists, since it must check the 492 type of each item in the list. If you know ahead of time that you want 493 to exclude all files, then you will be better off setting L{excludeFiles} 494 to C{True} before adding items to the list. 495 496 @param pattern: Regular expression pattern representing entries to remove 497 498 @return: Number of entries removed 499 @raise ValueError: If the passed-in pattern is not a valid regular expression. 500 """ 501 removed = 0 502 if pattern is None: 503 for entry in self[:]: 504 if os.path.exists(entry) and os.path.isfile(entry): 505 self.remove(entry) 506 logger.debug("Removed path [%s] from list." % entry) 507 removed += 1 508 else: 509 try: 510 compiled = re.compile(pattern) 511 except re.error: 512 raise ValueError("Pattern is not a valid regular expression.") 513 for entry in self[:]: 514 if os.path.exists(entry) and os.path.isfile(entry): 515 if compiled.match(entry): 516 self.remove(entry) 517 logger.debug("Removed path [%s] from list." % entry) 518 removed += 1 519 logger.debug("Removed a total of %d entries." % removed); 520 return removed
521
522 - def removeDirs(self, pattern=None):
523 """ 524 Removes directory entries from the list. 525 526 If C{pattern} is not passed in or is C{None}, then all directory entries 527 will be removed from the list. Otherwise, only those directory entries 528 matching the pattern will be removed. Any entry which does not exist on 529 disk will be ignored (use L{removeInvalid} to purge those entries). 530 531 This method might be fairly slow for large lists, since it must check the 532 type of each item in the list. If you know ahead of time that you want 533 to exclude all directories, then you will be better off setting 534 L{excludeDirs} to C{True} before adding items to the list (note that this 535 will not prevent you from recursively adding the I{contents} of 536 directories). 537 538 @param pattern: Regular expression pattern representing entries to remove 539 540 @return: Number of entries removed 541 @raise ValueError: If the passed-in pattern is not a valid regular expression. 542 """ 543 removed = 0 544 if pattern is None: 545 for entry in self[:]: 546 if os.path.exists(entry) and os.path.isdir(entry): 547 self.remove(entry) 548 logger.debug("Removed path [%s] from list." % entry) 549 removed += 1 550 else: 551 try: 552 compiled = re.compile(pattern) 553 except re.error: 554 raise ValueError("Pattern is not a valid regular expression.") 555 for entry in self[:]: 556 if os.path.exists(entry) and os.path.isdir(entry): 557 if compiled.match(entry): 558 self.remove(entry) 559 logger.debug("Removed path [%s] from list based on pattern [%s]." % (entry, pattern)) 560 removed += 1 561 logger.debug("Removed a total of %d entries." % removed); 562 return removed
563 603
604 - def removeMatch(self, pattern):
605 """ 606 Removes from the list all entries matching a pattern. 607 608 This method removes from the list all entries which match the passed in 609 C{pattern}. Since there is no need to check the type of each entry, it 610 is faster to call this method than to call the L{removeFiles}, 611 L{removeDirs} or L{removeLinks} methods individually. If you know which 612 patterns you will want to remove ahead of time, you may be better off 613 setting L{excludePatterns} or L{excludeBasenamePatterns} before adding 614 items to the list. 615 616 @note: Unlike when using the exclude lists, the pattern here is I{not} 617 bounded at the front and the back of the string. You can use any pattern 618 you want. 619 620 @param pattern: Regular expression pattern representing entries to remove 621 622 @return: Number of entries removed. 623 @raise ValueError: If the passed-in pattern is not a valid regular expression. 624 """ 625 try: 626 compiled = re.compile(pattern) 627 except re.error: 628 raise ValueError("Pattern is not a valid regular expression.") 629 removed = 0 630 for entry in self[:]: 631 if compiled.match(entry): 632 self.remove(entry) 633 logger.debug("Removed path [%s] from list based on pattern [%s]." % (entry, pattern)) 634 removed += 1 635 logger.debug("Removed a total of %d entries." % removed); 636 return removed
637
638 - def removeInvalid(self):
639 """ 640 Removes from the list all entries that do not exist on disk. 641 642 This method removes from the list all entries which do not currently 643 exist on disk in some form. No attention is paid to whether the entries 644 are files or directories. 645 646 @return: Number of entries removed. 647 """ 648 removed = 0 649 for entry in self[:]: 650 if not os.path.exists(entry): 651 self.remove(entry) 652 logger.debug("Removed path [%s] from list." % entry) 653 removed += 1 654 logger.debug("Removed a total of %d entries." % removed); 655 return removed
656 657 658 ################## 659 # Utility methods 660 ################## 661
662 - def normalize(self):
663 """Normalizes the list, ensuring that each entry is unique.""" 664 orig = len(self) 665 self.sort() 666 dups = filter(lambda x, self=self: self[x] == self[x+1], range(0, len(self) - 1)) 667 items = map(lambda x, self=self: self[x], dups) 668 map(self.remove, items) 669 new = len(self) 670 logger.debug("Completed normalizing list; removed %d items (%d originally, %d now)." % (new-orig, orig, new))
671
672 - def verify(self):
673 """ 674 Verifies that all entries in the list exist on disk. 675 @return: C{True} if all entries exist, C{False} otherwise. 676 """ 677 for entry in self: 678 if not os.path.exists(entry): 679 logger.debug("Path [%s] is invalid; list is not valid." % entry) 680 return False 681 logger.debug("All entries in list are valid.") 682 return True
683 684 685 ######################################################################## 686 # SpanItem class definition 687 ######################################################################## 688
689 -class SpanItem(object):
690 """ 691 Item returned by L{BackupFileList.generateSpan}. 692 """
693 - def __init__(self, fileList, size, capacity, utilization):
694 """ 695 Create object. 696 @param fileList: List of files 697 @param size: Size (in bytes) of files 698 @param utilization: Utilization, as a percentage (0-100) 699 """ 700 self.fileList = fileList 701 self.size = size 702 self.capacity = capacity 703 self.utilization = utilization
704 705 706 ######################################################################## 707 # BackupFileList class definition 708 ######################################################################## 709
710 -class BackupFileList(FilesystemList):
711 712 ###################### 713 # Class documentation 714 ###################### 715 716 """ 717 List of files to be backed up. 718 719 A BackupFileList is a L{FilesystemList} containing a list of files to be 720 backed up. It only contains files, not directories (soft links are treated 721 like files). On top of the generic functionality provided by 722 L{FilesystemList}, this class adds functionality to keep a hash (checksum) 723 for each file in the list, and it also provides a method to calculate the 724 total size of the files in the list and a way to export the list into tar 725 form. 726 727 @sort: __init__, addDir, totalSize, generateSizeMap, generateDigestMap, 728 generateFitted, generateTarfile, removeUnchanged 729 """ 730 731 ############## 732 # Constructor 733 ############## 734
735 - def __init__(self):
736 """Initializes a list with no configured exclusions.""" 737 FilesystemList.__init__(self)
738 739 740 ################################ 741 # Overridden superclass methods 742 ################################ 743
744 - def addDir(self, path):
745 """ 746 Adds a directory to the list. 747 748 Note that this class does not allow directories to be added by themselves 749 (a backup list contains only files). However, since links to directories 750 are technically files, we allow them to be added. 751 752 This method is implemented in terms of the superclass method, with one 753 additional validation: the superclass method is only called if the 754 passed-in path is both a directory and a link. All of the superclass's 755 existing validations and restrictions apply. 756 757 @param path: Directory path to be added to the list 758 @type path: String representing a path on disk 759 760 @return: Number of items added to the list. 761 762 @raise ValueError: If path is not a directory or does not exist. 763 @raise ValueError: If the path could not be encoded properly. 764 """ 765 path = encodePath(path) 766 path = normalizeDir(path) 767 if os.path.isdir(path) and not os.path.islink(path): 768 return 0 769 else: 770 return FilesystemList.addDir(self, path)
771 772 773 ################## 774 # Utility methods 775 ################## 776
777 - def totalSize(self):
778 """ 779 Returns the total size among all files in the list. 780 Only files are counted. 781 Soft links that point at files are ignored. 782 Entries which do not exist on disk are ignored. 783 @return: Total size, in bytes 784 """ 785 total = 0.0 786 for entry in self: 787 if os.path.isfile(entry) and not os.path.islink(entry): 788 total += float(os.stat(entry).st_size) 789 return total
790
791 - def generateSizeMap(self):
792 """ 793 Generates a mapping from file to file size in bytes. 794 The mapping does include soft links, which are listed with size zero. 795 Entries which do not exist on disk are ignored. 796 @return: Dictionary mapping file to file size 797 """ 798 table = { } 799 for entry in self: 800 if os.path.islink(entry): 801 table[entry] = 0.0 802 elif os.path.isfile(entry): 803 table[entry] = float(os.stat(entry).st_size) 804 return table
805
806 - def generateDigestMap(self, stripPrefix=None):
807 """ 808 Generates a mapping from file to file digest. 809 810 Currently, the digest is an SHA hash, which should be pretty secure. In 811 the future, this might be a different kind of hash, but we guarantee that 812 the type of the hash will not change unless the library major version 813 number is bumped. 814 815 Entries which do not exist on disk are ignored. 816 817 Soft links are ignored. We would end up generating a digest for the file 818 that the soft link points at, which doesn't make any sense. 819 820 If C{stripPrefix} is passed in, then that prefix will be stripped from 821 each key when the map is generated. This can be useful in generating two 822 "relative" digest maps to be compared to one another. 823 824 @param stripPrefix: Common prefix to be stripped from paths 825 @type stripPrefix: String with any contents 826 827 @return: Dictionary mapping file to digest value 828 @see: L{removeUnchanged} 829 """ 830 table = { } 831 if stripPrefix is not None: 832 for entry in self: 833 if os.path.isfile(entry) and not os.path.islink(entry): 834 table[entry.replace(stripPrefix, "", 1)] = BackupFileList._generateDigest(entry) 835 else: 836 for entry in self: 837 if os.path.isfile(entry) and not os.path.islink(entry): 838 table[entry] = BackupFileList._generateDigest(entry) 839 return table
840
841 - def _generateDigest(path):
842 """ 843 Generates an SHA digest for a given file on disk. 844 845 The original code for this function used this simplistic implementation, 846 which requires reading the entire file into memory at once in order to 847 generate a digest value:: 848 849 sha.new(open(path).read()).hexdigest() 850 851 Not surprisingly, this isn't an optimal solution. The U{Simple file 852 hashing <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259109>} 853 Python Cookbook recipe describes how to incrementally generate a hash 854 value by reading in chunks of data rather than reading the file all at 855 once. The recipe relies on the the C{update()} method of the various 856 Python hashing algorithms. 857 858 In my tests using a 110 MB file on CD, the original implementation 859 requires 111 seconds. This implementation requires only 40-45 seconds, 860 which is a pretty substantial speed-up. 861 862 Practice shows that reading in around 4kB (4096 bytes) at a time yields 863 the best performance. Smaller reads are quite a bit slower, and larger 864 reads don't make much of a difference. The 4kB number makes me a little 865 suspicious, and I think it might be related to the size of a filesystem 866 read at the hardware level. However, I've decided to just hardcode 4096 867 until I have evidence that shows it's worthwhile making the read size 868 configurable. 869 870 @param path: Path to generate digest for. 871 872 @return: ASCII-safe SHA digest for the file. 873 @raise OSError: If the file cannot be opened. 874 """ 875 s = sha.new() 876 f = open(path, mode="rb") # in case platform cares about binary reads 877 readBytes = 4096 # see notes above 878 while(readBytes > 0): 879 readString = f.read(readBytes) 880 s.update(readString) 881 readBytes = len(readString) 882 f.close() 883 digest = s.hexdigest() 884 logger.debug("Generated digest [%s] for file [%s]." % (digest, path)) 885 return digest
886 _generateDigest = staticmethod(_generateDigest) 887
888 - def generateFitted(self, capacity, algorithm="worst_fit"):
889 """ 890 Generates a list of items that fit in the indicated capacity. 891 892 Sometimes, callers would like to include every item in a list, but are 893 unable to because not all of the items fit in the space available. This 894 method returns a copy of the list, containing only the items that fit in 895 a given capacity. A copy is returned so that we don't lose any 896 information if for some reason the fitted list is unsatisfactory. 897 898 The fitting is done using the functions in the knapsack module. By 899 default, the first fit algorithm is used, but you can also choose 900 from best fit, worst fit and alternate fit. 901 902 @param capacity: Maximum capacity among the files in the new list 903 @type capacity: Integer, in bytes 904 905 @param algorithm: Knapsack (fit) algorithm to use 906 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 907 908 @return: Copy of list with total size no larger than indicated capacity 909 @raise ValueError: If the algorithm is invalid. 910 """ 911 table = self._getKnapsackTable() 912 function = BackupFileList._getKnapsackFunction(algorithm) 913 return function(table, capacity)[0]
914
915 - def generateSpan(self, capacity, algorithm="worst_fit"):
916 """ 917 Splits the list of items into sub-lists that fit in a given capacity. 918 919 Sometimes, callers need split to a backup file list into a set of smaller 920 lists. For instance, you could use this to "span" the files across a set 921 of discs. 922 923 The fitting is done using the functions in the knapsack module. By 924 default, the first fit algorithm is used, but you can also choose 925 from best fit, worst fit and alternate fit. 926 927 @note: If any of your items are larger than the capacity, then it won't 928 be possible to find a solution. In this case, a value error will be 929 raised. 930 931 @param capacity: Maximum capacity among the files in the new list 932 @type capacity: Integer, in bytes 933 934 @param algorithm: Knapsack (fit) algorithm to use 935 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 936 937 @return: List of L{SpanItem} objects. 938 939 @raise ValueError: If the algorithm is invalid. 940 @raise ValueError: If it's not possible to fit some items 941 """ 942 spanItems = [] 943 function = BackupFileList._getKnapsackFunction(algorithm) 944 table = self._getKnapsackTable(capacity) 945 iteration = 0 946 while len(table) > 0: 947 iteration += 1 948 fit = function(table, capacity) 949 if len(fit[0]) == 0: 950 # Should never happen due to validations in _convertToKnapsackForm(), but let's be safe 951 raise ValueError("After iteration %d, unable to add any new items." % iteration) 952 removeKeys(table, fit[0]) 953 utilization = (float(fit[1])/float(capacity))*100.0 954 item = SpanItem(fit[0], fit[1], capacity, utilization) 955 spanItems.append(item) 956 return spanItems
957
958 - def _getKnapsackTable(self, capacity=None):
959 """ 960 Converts the list into the form needed by the knapsack algorithms. 961 @return: Dictionary mapping file name to tuple of (file path, file size). 962 """ 963 table = { } 964 for entry in self: 965 if os.path.islink(entry): 966 table[entry] = (entry, 0.0) 967 elif os.path.isfile(entry): 968 size = float(os.stat(entry).st_size) 969 if capacity is not None: 970 if size > capacity: 971 raise ValueError("File [%s] cannot fit in capacity %s." % (entry, displayBytes(capacity))) 972 table[entry] = (entry, size) 973 return table
974
975 - def _getKnapsackFunction(algorithm):
976 """ 977 Returns a reference to the function associated with an algorithm name. 978 Algorithm name must be one of "first_fit", "best_fit", "worst_fit", "alternate_fit" 979 @param algorithm: Name of the algorithm 980 @return: Reference to knapsack function 981 @raise ValueError: If the algorithm name is unknown. 982 """ 983 if algorithm == "first_fit": 984 return firstFit 985 elif algorithm == "best_fit": 986 return bestFit 987 elif algorithm == "worst_fit": 988 return worstFit 989 elif algorithm == "alternate_fit": 990 return alternateFit 991 else: 992 raise ValueError("Algorithm [%s] is invalid." % algorithm);
993 _getKnapsackFunction = staticmethod(_getKnapsackFunction) 994
995 - def generateTarfile(self, path, mode='tar', ignore=False, flat=False):
996 """ 997 Creates a tar file containing the files in the list. 998 999 By default, this method will create uncompressed tar files. If you pass 1000 in mode C{'targz'}, then it will create gzipped tar files, and if you 1001 pass in mode C{'tarbz2'}, then it will create bzipped tar files. 1002 1003 The tar file will be created as a GNU tar archive, which enables extended 1004 file name lengths, etc. Since GNU tar is so prevalent, I've decided that 1005 the extra functionality out-weighs the disadvantage of not being 1006 "standard". 1007 1008 If you pass in C{flat=True}, then a "flat" archive will be created, and 1009 all of the files will be added to the root of the archive. So, the file 1010 C{/tmp/something/whatever.txt} would be added as just C{whatever.txt}. 1011 1012 By default, the whole method call fails if there are problems adding any 1013 of the files to the archive, resulting in an exception. Under these 1014 circumstances, callers are advised that they might want to call 1015 L{removeInvalid()} and then attempt to extract the tar file a second 1016 time, since the most common cause of failures is a missing file (a file 1017 that existed when the list was built, but is gone again by the time the 1018 tar file is built). 1019 1020 If you want to, you can pass in C{ignore=True}, and the method will 1021 ignore errors encountered when adding individual files to the archive 1022 (but not errors opening and closing the archive itself). 1023 1024 We'll always attempt to remove the tarfile from disk if an exception will 1025 be thrown. 1026 1027 @note: No validation is done as to whether the entries in the list are 1028 files, since only files or soft links should be in an object like this. 1029 However, to be safe, everything is explicitly added to the tar archive 1030 non-recursively so it's safe to include soft links to directories. 1031 1032 @note: The Python C{tarfile} module, which is used internally here, is 1033 supposed to deal properly with long filenames and links. In my testing, 1034 I have found that it appears to be able to add long really long filenames 1035 to archives, but doesn't do a good job reading them back out, even out of 1036 an archive it created. Fortunately, all Cedar Backup does is add files 1037 to archives. 1038 1039 @param path: Path of tar file to create on disk 1040 @type path: String representing a path on disk 1041 1042 @param mode: Tar creation mode 1043 @type mode: One of either C{'tar'}, C{'targz'} or C{'tarbz2'} 1044 1045 @param ignore: Indicates whether to ignore certain errors. 1046 @type ignore: Boolean 1047 1048 @param flat: Creates "flat" archive by putting all items in root 1049 @type flat: Boolean 1050 1051 @raise ValueError: If mode is not valid 1052 @raise ValueError: If list is empty 1053 @raise ValueError: If the path could not be encoded properly. 1054 @raise TarError: If there is a problem creating the tar file 1055 """ 1056 path = encodePath(path) 1057 if len(self) == 0: raise ValueError("Empty list cannot be used to generate tarfile.") 1058 if(mode == 'tar'): tarmode = "w:" 1059 elif(mode == 'targz'): tarmode = "w:gz" 1060 elif(mode == 'tarbz2'): tarmode = "w:bz2" 1061 else: raise ValueError("Mode [%s] is not valid." % mode) 1062 try: 1063 tar = tarfile.open(path, tarmode) 1064 tar.posix = False # make a GNU-compatible archive without file length limits 1065 for entry in self: 1066 try: 1067 if flat: 1068 tar.add(entry, arcname=os.path.basename(entry), recursive=False) 1069 else: 1070 tar.add(entry, recursive=False) 1071 except tarfile.TarError, e: 1072 if not ignore: 1073 raise e 1074 logger.info("Unable to add file [%s]; going on anyway." % entry) 1075 except OSError, e: 1076 if not ignore: 1077 raise tarfile.TarError(e) 1078 logger.info("Unable to add file [%s]; going on anyway." % entry) 1079 tar.close() 1080 except tarfile.ReadError, e: 1081 try: tar.close() 1082 except: pass 1083 if os.path.exists(path): 1084 try: os.remove(path) 1085 except: pass 1086 raise tarfile.ReadError("Unable to open [%s]; maybe directory doesn't exist?" % path) 1087 except tarfile.TarError, e: 1088 try: tar.close() 1089 except: pass 1090 if os.path.exists(path): 1091 try: os.remove(path) 1092 except: pass 1093 raise e
1094
1095 - def removeUnchanged(self, digestMap, captureDigest=False):
1096 """ 1097 Removes unchanged entries from the list. 1098 1099 This method relies on a digest map as returned from L{generateDigestMap}. 1100 For each entry in C{digestMap}, if the entry also exists in the current 1101 list I{and} the entry in the current list has the same digest value as in 1102 the map, the entry in the current list will be removed. 1103 1104 This method offers a convenient way for callers to filter unneeded 1105 entries from a list. The idea is that a caller will capture a digest map 1106 from C{generateDigestMap} at some point in time (perhaps the beginning of 1107 the week), and will save off that map using C{pickle} or some other 1108 method. Then, the caller could use this method sometime in the future to 1109 filter out any unchanged files based on the saved-off map. 1110 1111 If C{captureDigest} is passed-in as C{True}, then digest information will 1112 be captured for the entire list before the removal step occurs using the 1113 same rules as in L{generateDigestMap}. The check will involve a lookup 1114 into the complete digest map. 1115 1116 If C{captureDigest} is passed in as C{False}, we will only generate a 1117 digest value for files we actually need to check, and we'll ignore any 1118 entry in the list which isn't a file that currently exists on disk. 1119 1120 The return value varies depending on C{captureDigest}, as well. To 1121 preserve backwards compatibility, if C{captureDigest} is C{False}, then 1122 we'll just return a single value representing the number of entries 1123 removed. Otherwise, we'll return a tuple of C{(entries removed, digest 1124 map)}. The returned digest map will be in exactly the form returned by 1125 L{generateDigestMap}. 1126 1127 @note: For performance reasons, this method actually ends up rebuilding 1128 the list from scratch. First, we build a temporary dictionary containing 1129 all of the items from the original list. Then, we remove items as needed 1130 from the dictionary (which is faster than the equivalent operation on a 1131 list). Finally, we replace the contents of the current list based on the 1132 keys left in the dictionary. This should be transparent to the caller. 1133 1134 @param digestMap: Dictionary mapping file name to digest value. 1135 @type digestMap: Map as returned from L{generateDigestMap}. 1136 1137 @param captureDigest: Indicates that digest information should be captured. 1138 @type captureDigest: Boolean 1139 1140 @return: Number of entries removed 1141 """ 1142 if captureDigest: 1143 removed = 0 1144 table = {} 1145 captured = {} 1146 for entry in self: 1147 if os.path.isfile(entry) and not os.path.islink(entry): 1148 table[entry] = BackupFileList._generateDigest(entry) 1149 captured[entry] = table[entry] 1150 else: 1151 table[entry] = None 1152 for entry in digestMap.keys(): 1153 if table.has_key(entry): 1154 if table[entry] is not None: # equivalent to file/link check in other case 1155 digest = table[entry] 1156 if digest == digestMap[entry]: 1157 removed += 1 1158 del table[entry] 1159 logger.debug("Discarded unchanged file [%s]." % entry) 1160 self[:] = table.keys() 1161 return (removed, captured) 1162 else: 1163 removed = 0 1164 table = {} 1165 for entry in self: 1166 table[entry] = None 1167 for entry in digestMap.keys(): 1168 if table.has_key(entry): 1169 if os.path.isfile(entry) and not os.path.islink(entry): 1170 digest = BackupFileList._generateDigest(entry) 1171 if digest == digestMap[entry]: 1172 removed += 1 1173 del table[entry] 1174 logger.debug("Discarded unchanged file [%s]." % entry) 1175 self[:] = table.keys() 1176 return removed
1177 1178 1179 ######################################################################## 1180 # PurgeItemList class definition 1181 ######################################################################## 1182
1183 -class PurgeItemList(FilesystemList):
1184 1185 ###################### 1186 # Class documentation 1187 ###################### 1188 1189 """ 1190 List of files and directories to be purged. 1191 1192 A PurgeItemList is a L{FilesystemList} containing a list of files and 1193 directories to be purged. On top of the generic functionality provided by 1194 L{FilesystemList}, this class adds functionality to remove items that are 1195 too young to be purged, and to actually remove each item in the list from 1196 the filesystem. 1197 1198 The other main difference is that when you add a directory's contents to a 1199 purge item list, the directory itself is not added to the list. This way, 1200 if someone asks to purge within in C{/opt/backup/collect}, that directory 1201 doesn't get removed once all of the files within it is gone. 1202 """ 1203 1204 ############## 1205 # Constructor 1206 ############## 1207
1208 - def __init__(self):
1209 """Initializes a list with no configured exclusions.""" 1210 FilesystemList.__init__(self)
1211 1212 1213 ############## 1214 # Add methods 1215 ############## 1216
1217 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0):
1218 """ 1219 Adds the contents of a directory to the list. 1220 1221 The path must exist and must be a directory or a link to a directory. 1222 The contents of the directory (but I{not} the directory path itself) will 1223 be recursively added to the list, subject to any exclusions that are in 1224 place. If you only want the directory and its contents to be added, then 1225 pass in C{recursive=False}. 1226 1227 @note: If a directory's absolute path matches an exclude pattern or path, 1228 or if the directory contains the configured ignore file, then the 1229 directory and all of its contents will be recursively excluded from the 1230 list. 1231 1232 @note: If the passed-in directory happens to be a soft link, it will be 1233 recursed. However, the linkDepth parameter controls whether any soft 1234 links I{within} the directory will be recursed. The link depth is 1235 maximum depth of the tree at which soft links should be followed. So, a 1236 depth of 0 does not follow any soft links, a depth of 1 follows only 1237 links within the passed-in directory, a depth of 2 follows the links at 1238 the next level down, etc. 1239 1240 @note: Any invalid soft links (i.e. soft links that point to 1241 non-existent items) will be silently ignored. 1242 1243 @note: The L{excludeDirs} flag only controls whether any given soft link 1244 path itself is added to the list once it has been discovered. It does 1245 I{not} modify any behavior related to directory recursion. 1246 1247 @note: The L{excludeDirs} flag only controls whether any given directory 1248 path itself is added to the list once it has been discovered. It does 1249 I{not} modify any behavior related to directory recursion. 1250 1251 @param path: Directory path whose contents should be added to the list 1252 @type path: String representing a path on disk 1253 1254 @param recursive: Indicates whether directory contents should be added recursively. 1255 @type recursive: Boolean value 1256 1257 @param addSelf: Ignored in this subclass. 1258 1259 @param linkDepth: Depth of soft links that should be followed 1260 @type linkDepth: Integer value, where zero means not to follow any soft links 1261 1262 @return: Number of items recursively added to the list 1263 1264 @raise ValueError: If path is not a directory or does not exist. 1265 @raise ValueError: If the path could not be encoded properly. 1266 """ 1267 path = encodePath(path) 1268 path = normalizeDir(path) 1269 return super(PurgeItemList, self)._addDirContentsInternal(path, False, recursive, linkDepth)
1270 1271 1272 ################## 1273 # Utility methods 1274 ################## 1275
1276 - def removeYoungFiles(self, daysOld):
1277 """ 1278 Removes from the list files younger than a certain age (in days). 1279 1280 Any file whose "age" in days is less than (C{<}) the value of the 1281 C{daysOld} parameter will be removed from the list so that it will not be 1282 purged later when L{purgeItems} is called. Directories and soft links 1283 will be ignored. 1284 1285 The "age" of a file is the amount of time since the file was last used, 1286 per the most recent of the file's C{st_atime} and C{st_mtime} values. 1287 1288 @note: Some people find the "sense" of this method confusing or 1289 "backwards". Keep in mind that this method is used to remove items 1290 I{from the list}, not from the filesystem! It removes from the list 1291 those items that you would I{not} want to purge because they are too 1292 young. As an example, passing in C{daysOld} of zero (0) would remove 1293 from the list no files, which would result in purging all of the files 1294 later. I would be happy to make a synonym of this method with an 1295 easier-to-understand "sense", if someone can suggest one. 1296 1297 @param daysOld: Minimum age of files that are to be kept in the list. 1298 @type daysOld: Integer value >= 0. 1299 1300 @return: Number of entries removed 1301 """ 1302 removed = 0 1303 daysOld = int(daysOld) 1304 if daysOld < 0: 1305 raise ValueError("Days old value must be an integer >= 0.") 1306 for entry in self[:]: 1307 if os.path.isfile(entry) and not os.path.islink(entry): 1308 try: 1309 ageInDays = calculateFileAge(entry) 1310 ageInWholeDays = math.floor(ageInDays) 1311 if ageInWholeDays < daysOld: 1312 removed += 1 1313 self.remove(entry) 1314 except OSError: 1315 pass 1316 return removed
1317
1318 - def purgeItems(self):
1319 """ 1320 Purges all items in the list. 1321 1322 Every item in the list will be purged. Directories in the list will 1323 I{not} be purged recursively, and hence will only be removed if they are 1324 empty. Errors will be ignored. 1325 1326 To faciliate easy removal of directories that will end up being empty, 1327 the delete process happens in two passes: files first (including soft 1328 links), then directories. 1329 1330 @return: Tuple containing count of (files, dirs) removed 1331 """ 1332 files = 0 1333 dirs = 0 1334 for entry in self: 1335 if os.path.exists(entry) and (os.path.isfile(entry) or os.path.islink(entry)): 1336 try: 1337 os.remove(entry) 1338 files += 1 1339 logger.debug("Purged file [%s]." % entry) 1340 except OSError: 1341 pass 1342 for entry in self: 1343 if os.path.exists(entry) and os.path.isdir(entry) and not os.path.islink(entry): 1344 try: 1345 os.rmdir(entry) 1346 dirs += 1 1347 logger.debug("Purged empty directory [%s]." % entry) 1348 except OSError: 1349 pass 1350 return (files, dirs)
1351 1352 1353 ######################################################################## 1354 # Public functions 1355 ######################################################################## 1356 1357 ########################## 1358 # normalizeDir() function 1359 ########################## 1360
1361 -def normalizeDir(path):
1362 """ 1363 Normalizes a directory name. 1364 1365 For our purposes, a directory name is normalized by removing the trailing 1366 path separator, if any. This is important because we want directories to 1367 appear within lists in a consistent way, although from the user's 1368 perspective passing in C{/path/to/dir/} and C{/path/to/dir} are equivalent. 1369 1370 @param path: Path to be normalized. 1371 @type path: String representing a path on disk 1372 1373 @return: Normalized path, which should be equivalent to the original. 1374 """ 1375 if path != os.sep and path[-1:] == os.sep: 1376 return path[:-1] 1377 return path
1378 1379 1380 ############################# 1381 # compareContents() function 1382 ############################# 1383
1384 -def compareContents(path1, path2, verbose=False):
1385 """ 1386 Compares the contents of two directories to see if they are equivalent. 1387 1388 The two directories are recursively compared. First, we check whether they 1389 contain exactly the same set of files. Then, we check to see every given 1390 file has exactly the same contents in both directories. 1391 1392 This is all relatively simple to implement through the magic of 1393 L{BackupFileList.generateDigestMap}, which knows how to strip a path prefix 1394 off the front of each entry in the mapping it generates. This makes our 1395 comparison as simple as creating a list for each path, then generating a 1396 digest map for each path and comparing the two. 1397 1398 If no exception is thrown, the two directories are considered identical. 1399 1400 If the C{verbose} flag is C{True}, then an alternate (but slower) method is 1401 used so that any thrown exception can indicate exactly which file caused the 1402 comparison to fail. The thrown C{ValueError} exception distinguishes 1403 between the directories containing different files, and containing the same 1404 files with differing content. 1405 1406 @note: Symlinks are I{not} followed for the purposes of this comparison. 1407 1408 @param path1: First path to compare. 1409 @type path1: String representing a path on disk 1410 1411 @param path2: First path to compare. 1412 @type path2: String representing a path on disk 1413 1414 @param verbose: Indicates whether a verbose response should be given. 1415 @type verbose: Boolean 1416 1417 @raise ValueError: If a directory doesn't exist or can't be read. 1418 @raise ValueError: If the two directories are not equivalent. 1419 @raise IOError: If there is an unusual problem reading the directories. 1420 """ 1421 try: 1422 path1List = BackupFileList() 1423 path1List.addDirContents(path1) 1424 path1Digest = path1List.generateDigestMap(stripPrefix=normalizeDir(path1)) 1425 path2List = BackupFileList() 1426 path2List.addDirContents(path2) 1427 path2Digest = path2List.generateDigestMap(stripPrefix=normalizeDir(path2)) 1428 compareDigestMaps(path1Digest, path2Digest, verbose) 1429 except IOError, e: 1430 logger.error("I/O error encountered during consistency check.") 1431 raise e
1432
1433 -def compareDigestMaps(digest1, digest2, verbose=False):
1434 """ 1435 Compares two digest maps and throws an exception if they differ. 1436 1437 @param digest1: First digest to compare. 1438 @type digest1: Digest as returned from BackupFileList.generateDigestMap() 1439 1440 @param digest2: Second digest to compare. 1441 @type digest2: Digest as returned from BackupFileList.generateDigestMap() 1442 1443 @param verbose: Indicates whether a verbose response should be given. 1444 @type verbose: Boolean 1445 1446 @raise ValueError: If the two directories are not equivalent. 1447 """ 1448 if not verbose: 1449 if digest1 != digest2: 1450 raise ValueError("Consistency check failed.") 1451 else: 1452 list1 = UnorderedList(digest1.keys()) 1453 list2 = UnorderedList(digest2.keys()) 1454 if list1 != list2: 1455 raise ValueError("Directories contain a different set of files.") 1456 for key in list1: 1457 if digest1[key] != digest2[key]: 1458 raise ValueError("File contents for [%s] vary between directories." % key)
1459