246 lines
		
	
	
		
			8.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			246 lines
		
	
	
		
			8.0 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
|   | # Copyright 2003 Dave Abrahams | ||
|  | # Copyright 2001, 2002 Vladimir Prus | ||
|  | # Copyright 2012 Jurko Gospodnetic | ||
|  | # Distributed under the Boost Software License, Version 1.0. | ||
|  | # (See accompanying file LICENSE.txt or copy at | ||
|  | # https://www.bfgroup.xyz/b2/LICENSE.txt) | ||
|  | 
 | ||
|  | ############################################################################### | ||
|  | # | ||
|  | # Based in part on an old Subversion tree.py source file (tools for comparing | ||
|  | # directory trees). See http://subversion.tigris.org for more information. | ||
|  | # | ||
|  | # Copyright (c) 2001 Sam Tobin-Hochstadt.  All rights reserved. | ||
|  | # | ||
|  | # This software is licensed as described in the file COPYING, which you should | ||
|  | # have received as part of this distribution. The terms are also available at | ||
|  | # http://subversion.tigris.org/license-1.html. If newer versions of this | ||
|  | # license are posted there, you may use a newer version instead, at your | ||
|  | # option. | ||
|  | # | ||
|  | ############################################################################### | ||
|  | 
 | ||
|  | from __future__ import print_function | ||
|  | 
 | ||
|  | import os | ||
|  | import os.path | ||
|  | import stat | ||
|  | import sys | ||
|  | 
 | ||
|  | 
 | ||
|  | class TreeNode: | ||
|  |     """
 | ||
|  |       Fundamental data type used to build file system tree structures. | ||
|  | 
 | ||
|  |       If CHILDREN is None, then the node represents a file. Otherwise, CHILDREN | ||
|  |     is a list of the nodes representing that directory's children. | ||
|  | 
 | ||
|  |       NAME is simply the name of the file or directory. CONTENTS is a string | ||
|  |     holding the file's contents (if a file). | ||
|  | 
 | ||
|  |     """
 | ||
|  | 
 | ||
|  |     def __init__(self, name, children=None, contents=None): | ||
|  |         assert children is None or contents is None | ||
|  |         self.name = name | ||
|  |         self.mtime = 0 | ||
|  |         self.children = children | ||
|  |         self.contents = contents | ||
|  |         self.path = name | ||
|  | 
 | ||
|  |     def add_child(self, newchild): | ||
|  |         assert not self.is_file() | ||
|  |         for a in self.children: | ||
|  |             if a.name == newchild.name: | ||
|  |                 if newchild.is_file(): | ||
|  |                     a.contents = newchild.contents | ||
|  |                     a.path = os.path.join(self.path, newchild.name) | ||
|  |                 else: | ||
|  |                     for i in newchild.children: | ||
|  |                         a.add_child(i) | ||
|  |                 break | ||
|  |         else: | ||
|  |             self.children.append(newchild) | ||
|  |             newchild.path = os.path.join(self.path, newchild.name) | ||
|  | 
 | ||
|  |     def get_child(self, name): | ||
|  |         """
 | ||
|  |           If the given TreeNode directory NODE contains a child named NAME, | ||
|  |         return the child; else, return None. | ||
|  | 
 | ||
|  |         """
 | ||
|  |         for n in self.children: | ||
|  |             if n.name == name: | ||
|  |                 return n | ||
|  | 
 | ||
|  |     def is_file(self): | ||
|  |         return self.children is None | ||
|  | 
 | ||
|  |     def pprint(self): | ||
|  |         print(" * Node name: %s" % self.name) | ||
|  |         print("    Path:     %s" % self.path) | ||
|  |         print("    Contents: %s" % self.contents) | ||
|  |         if self.is_file(): | ||
|  |             print("    Children: is a file.") | ||
|  |         else: | ||
|  |             print("    Children: %d" % len(self.children)) | ||
|  | 
 | ||
|  | 
 | ||
|  | class TreeDifference: | ||
|  |     def __init__(self): | ||
|  |         self.added_files = [] | ||
|  |         self.removed_files = [] | ||
|  |         self.modified_files = [] | ||
|  |         self.touched_files = [] | ||
|  | 
 | ||
|  |     def append(self, other): | ||
|  |         self.added_files.extend(other.added_files) | ||
|  |         self.removed_files.extend(other.removed_files) | ||
|  |         self.modified_files.extend(other.modified_files) | ||
|  |         self.touched_files.extend(other.touched_files) | ||
|  | 
 | ||
|  |     def ignore_directories(self): | ||
|  |         """Removes directories from our lists of found differences.""" | ||
|  |         not_dir = lambda x : x[-1] != "/" | ||
|  |         self.added_files = list(filter(not_dir, self.added_files)) | ||
|  |         self.removed_files = list(filter(not_dir, self.removed_files)) | ||
|  |         self.modified_files = list(filter(not_dir, self.modified_files)) | ||
|  |         self.touched_files = list(filter(not_dir, self.touched_files)) | ||
|  | 
 | ||
|  |     def pprint(self, file=sys.stdout): | ||
|  |         file.write("Added files   : %s\n" % self.added_files) | ||
|  |         file.write("Removed files : %s\n" % self.removed_files) | ||
|  |         file.write("Modified files: %s\n" % self.modified_files) | ||
|  |         file.write("Touched files : %s\n" % self.touched_files) | ||
|  | 
 | ||
|  |     def empty(self): | ||
|  |         return not (self.added_files or self.removed_files or | ||
|  |             self.modified_files or self.touched_files) | ||
|  | 
 | ||
|  | 
 | ||
|  | def build_tree(path): | ||
|  |     """
 | ||
|  |       Takes PATH as the folder path, walks the file system below that path, and | ||
|  |     creates a tree structure based on any files and folders found there. | ||
|  |     Returns the prepared tree structure plus the maximum file modification | ||
|  |     timestamp under the given folder. | ||
|  | 
 | ||
|  |     """
 | ||
|  |     return _handle_dir(os.path.normpath(path)) | ||
|  | 
 | ||
|  | 
 | ||
|  | def tree_difference(a, b): | ||
|  |     """Compare TreeNodes A and B, and create a TreeDifference instance.""" | ||
|  |     return _do_tree_difference(a, b, "", True) | ||
|  | 
 | ||
|  | 
 | ||
|  | def _do_tree_difference(a, b, parent_path, root=False): | ||
|  |     """Internal recursive worker function for tree_difference().""" | ||
|  | 
 | ||
|  |     # We do not want to list root node names. | ||
|  |     if root: | ||
|  |         assert not parent_path | ||
|  |         assert not a.is_file() | ||
|  |         assert not b.is_file() | ||
|  |         full_path = "" | ||
|  |     else: | ||
|  |         assert a.name == b.name | ||
|  |         full_path = parent_path + a.name | ||
|  |     result = TreeDifference() | ||
|  | 
 | ||
|  |     # A and B are both files. | ||
|  |     if a.is_file() and b.is_file(): | ||
|  |         if a.contents != b.contents: | ||
|  |             result.modified_files.append(full_path) | ||
|  |         elif a.mtime != b.mtime: | ||
|  |             result.touched_files.append(full_path) | ||
|  |         return result | ||
|  | 
 | ||
|  |     # Directory converted to file. | ||
|  |     if not a.is_file() and b.is_file(): | ||
|  |         result.removed_files.extend(_traverse_tree(a, parent_path)) | ||
|  |         result.added_files.append(full_path) | ||
|  | 
 | ||
|  |     # File converted to directory. | ||
|  |     elif a.is_file() and not b.is_file(): | ||
|  |         result.removed_files.append(full_path) | ||
|  |         result.added_files.extend(_traverse_tree(b, parent_path)) | ||
|  | 
 | ||
|  |     # A and B are both directories. | ||
|  |     else: | ||
|  |         if full_path: | ||
|  |             full_path += "/" | ||
|  |         accounted_for = []  # Children present in both trees. | ||
|  |         for a_child in a.children: | ||
|  |             b_child = b.get_child(a_child.name) | ||
|  |             if b_child: | ||
|  |                 accounted_for.append(b_child) | ||
|  |                 result.append(_do_tree_difference(a_child, b_child, full_path)) | ||
|  |             else: | ||
|  |                 result.removed_files.append(full_path + a_child.name) | ||
|  |         for b_child in b.children: | ||
|  |             if b_child not in accounted_for: | ||
|  |                 result.added_files.extend(_traverse_tree(b_child, full_path)) | ||
|  | 
 | ||
|  |     return result | ||
|  | 
 | ||
|  | 
 | ||
|  | def _traverse_tree(t, parent_path): | ||
|  |     """Returns a list of all names in a tree.""" | ||
|  |     assert not parent_path or parent_path[-1] == "/" | ||
|  |     full_node_name = parent_path + t.name | ||
|  |     if t.is_file(): | ||
|  |         result = [full_node_name] | ||
|  |     else: | ||
|  |         name_prefix = full_node_name + "/" | ||
|  |         result = [name_prefix] | ||
|  |         for i in t.children: | ||
|  |             result.extend(_traverse_tree(i, name_prefix)) | ||
|  |     return result | ||
|  | 
 | ||
|  | 
 | ||
|  | def _get_text(path): | ||
|  |     """Return a string with the textual contents of a file at PATH.""" | ||
|  |     fp = open(path, 'rb') | ||
|  |     try: | ||
|  |         return fp.read() | ||
|  |     finally: | ||
|  |         fp.close() | ||
|  | 
 | ||
|  | 
 | ||
|  | def _handle_dir(path): | ||
|  |     """
 | ||
|  |       Main recursive worker function for build_tree(). Returns a newly created | ||
|  |     tree node representing the given normalized folder path as well as the | ||
|  |     maximum file/folder modification time detected under the same path. | ||
|  | 
 | ||
|  |     """
 | ||
|  |     files = [] | ||
|  |     dirs = [] | ||
|  |     node = TreeNode(os.path.basename(path), children=[]) | ||
|  |     max_mtime = node.mtime = os.stat(path).st_mtime | ||
|  | 
 | ||
|  |     # List files & folders. | ||
|  |     for f in os.listdir(path): | ||
|  |         f = os.path.join(path, f) | ||
|  |         if os.path.isdir(f): | ||
|  |             dirs.append(f) | ||
|  |         elif os.path.isfile(f): | ||
|  |             files.append(f) | ||
|  | 
 | ||
|  |     # Add a child node for each file. | ||
|  |     for f in files: | ||
|  |         fcontents = _get_text(f) | ||
|  |         new_file_node = TreeNode(os.path.basename(f), contents=fcontents) | ||
|  |         new_file_node.mtime = os.stat(f).st_mtime | ||
|  |         max_mtime = max(max_mtime, new_file_node.mtime) | ||
|  |         node.add_child(new_file_node) | ||
|  | 
 | ||
|  |     # For each subdir, create a node, walk its tree, add it as a child. | ||
|  |     for d in dirs: | ||
|  |         new_dir_node, new_max_mtime = _handle_dir(d) | ||
|  |         max_mtime = max(max_mtime, new_max_mtime) | ||
|  |         node.add_child(new_dir_node) | ||
|  | 
 | ||
|  |     return node, max_mtime |