246 lines
		
	
	
		
			8.0 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			246 lines
		
	
	
		
			8.0 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
	
	
	
| # Copyright 2003 Dave Abrahams
 | |
| # Copyright 2001, 2002 Vladimir Prus
 | |
| # Copyright 2012 Jurko Gospodnetic
 | |
| # Distributed under the Boost Software License, Version 1.0.
 | |
| # (See accompanying file LICENSE.txt or copy at
 | |
| # https://www.bfgroup.xyz/b2/LICENSE.txt)
 | |
| 
 | |
| ###############################################################################
 | |
| #
 | |
| # Based in part on an old Subversion tree.py source file (tools for comparing
 | |
| # directory trees). See http://subversion.tigris.org for more information.
 | |
| #
 | |
| # Copyright (c) 2001 Sam Tobin-Hochstadt.  All rights reserved.
 | |
| #
 | |
| # This software is licensed as described in the file COPYING, which you should
 | |
| # have received as part of this distribution. The terms are also available at
 | |
| # http://subversion.tigris.org/license-1.html. If newer versions of this
 | |
| # license are posted there, you may use a newer version instead, at your
 | |
| # option.
 | |
| #
 | |
| ###############################################################################
 | |
| 
 | |
| from __future__ import print_function
 | |
| 
 | |
| import os
 | |
| import os.path
 | |
| import stat
 | |
| import sys
 | |
| 
 | |
| 
 | |
| class TreeNode:
 | |
|     """
 | |
|       Fundamental data type used to build file system tree structures.
 | |
| 
 | |
|       If CHILDREN is None, then the node represents a file. Otherwise, CHILDREN
 | |
|     is a list of the nodes representing that directory's children.
 | |
| 
 | |
|       NAME is simply the name of the file or directory. CONTENTS is a string
 | |
|     holding the file's contents (if a file).
 | |
| 
 | |
|     """
 | |
| 
 | |
|     def __init__(self, name, children=None, contents=None):
 | |
|         assert children is None or contents is None
 | |
|         self.name = name
 | |
|         self.mtime = 0
 | |
|         self.children = children
 | |
|         self.contents = contents
 | |
|         self.path = name
 | |
| 
 | |
|     def add_child(self, newchild):
 | |
|         assert not self.is_file()
 | |
|         for a in self.children:
 | |
|             if a.name == newchild.name:
 | |
|                 if newchild.is_file():
 | |
|                     a.contents = newchild.contents
 | |
|                     a.path = os.path.join(self.path, newchild.name)
 | |
|                 else:
 | |
|                     for i in newchild.children:
 | |
|                         a.add_child(i)
 | |
|                 break
 | |
|         else:
 | |
|             self.children.append(newchild)
 | |
|             newchild.path = os.path.join(self.path, newchild.name)
 | |
| 
 | |
|     def get_child(self, name):
 | |
|         """
 | |
|           If the given TreeNode directory NODE contains a child named NAME,
 | |
|         return the child; else, return None.
 | |
| 
 | |
|         """
 | |
|         for n in self.children:
 | |
|             if n.name == name:
 | |
|                 return n
 | |
| 
 | |
|     def is_file(self):
 | |
|         return self.children is None
 | |
| 
 | |
|     def pprint(self):
 | |
|         print(" * Node name: %s" % self.name)
 | |
|         print("    Path:     %s" % self.path)
 | |
|         print("    Contents: %s" % self.contents)
 | |
|         if self.is_file():
 | |
|             print("    Children: is a file.")
 | |
|         else:
 | |
|             print("    Children: %d" % len(self.children))
 | |
| 
 | |
| 
 | |
| class TreeDifference:
 | |
|     def __init__(self):
 | |
|         self.added_files = []
 | |
|         self.removed_files = []
 | |
|         self.modified_files = []
 | |
|         self.touched_files = []
 | |
| 
 | |
|     def append(self, other):
 | |
|         self.added_files.extend(other.added_files)
 | |
|         self.removed_files.extend(other.removed_files)
 | |
|         self.modified_files.extend(other.modified_files)
 | |
|         self.touched_files.extend(other.touched_files)
 | |
| 
 | |
|     def ignore_directories(self):
 | |
|         """Removes directories from our lists of found differences."""
 | |
|         not_dir = lambda x : x[-1] != "/"
 | |
|         self.added_files = list(filter(not_dir, self.added_files))
 | |
|         self.removed_files = list(filter(not_dir, self.removed_files))
 | |
|         self.modified_files = list(filter(not_dir, self.modified_files))
 | |
|         self.touched_files = list(filter(not_dir, self.touched_files))
 | |
| 
 | |
|     def pprint(self, file=sys.stdout):
 | |
|         file.write("Added files   : %s\n" % self.added_files)
 | |
|         file.write("Removed files : %s\n" % self.removed_files)
 | |
|         file.write("Modified files: %s\n" % self.modified_files)
 | |
|         file.write("Touched files : %s\n" % self.touched_files)
 | |
| 
 | |
|     def empty(self):
 | |
|         return not (self.added_files or self.removed_files or
 | |
|             self.modified_files or self.touched_files)
 | |
| 
 | |
| 
 | |
| def build_tree(path):
 | |
|     """
 | |
|       Takes PATH as the folder path, walks the file system below that path, and
 | |
|     creates a tree structure based on any files and folders found there.
 | |
|     Returns the prepared tree structure plus the maximum file modification
 | |
|     timestamp under the given folder.
 | |
| 
 | |
|     """
 | |
|     return _handle_dir(os.path.normpath(path))
 | |
| 
 | |
| 
 | |
| def tree_difference(a, b):
 | |
|     """Compare TreeNodes A and B, and create a TreeDifference instance."""
 | |
|     return _do_tree_difference(a, b, "", True)
 | |
| 
 | |
| 
 | |
| def _do_tree_difference(a, b, parent_path, root=False):
 | |
|     """Internal recursive worker function for tree_difference()."""
 | |
| 
 | |
|     # We do not want to list root node names.
 | |
|     if root:
 | |
|         assert not parent_path
 | |
|         assert not a.is_file()
 | |
|         assert not b.is_file()
 | |
|         full_path = ""
 | |
|     else:
 | |
|         assert a.name == b.name
 | |
|         full_path = parent_path + a.name
 | |
|     result = TreeDifference()
 | |
| 
 | |
|     # A and B are both files.
 | |
|     if a.is_file() and b.is_file():
 | |
|         if a.contents != b.contents:
 | |
|             result.modified_files.append(full_path)
 | |
|         elif a.mtime != b.mtime:
 | |
|             result.touched_files.append(full_path)
 | |
|         return result
 | |
| 
 | |
|     # Directory converted to file.
 | |
|     if not a.is_file() and b.is_file():
 | |
|         result.removed_files.extend(_traverse_tree(a, parent_path))
 | |
|         result.added_files.append(full_path)
 | |
| 
 | |
|     # File converted to directory.
 | |
|     elif a.is_file() and not b.is_file():
 | |
|         result.removed_files.append(full_path)
 | |
|         result.added_files.extend(_traverse_tree(b, parent_path))
 | |
| 
 | |
|     # A and B are both directories.
 | |
|     else:
 | |
|         if full_path:
 | |
|             full_path += "/"
 | |
|         accounted_for = []  # Children present in both trees.
 | |
|         for a_child in a.children:
 | |
|             b_child = b.get_child(a_child.name)
 | |
|             if b_child:
 | |
|                 accounted_for.append(b_child)
 | |
|                 result.append(_do_tree_difference(a_child, b_child, full_path))
 | |
|             else:
 | |
|                 result.removed_files.append(full_path + a_child.name)
 | |
|         for b_child in b.children:
 | |
|             if b_child not in accounted_for:
 | |
|                 result.added_files.extend(_traverse_tree(b_child, full_path))
 | |
| 
 | |
|     return result
 | |
| 
 | |
| 
 | |
| def _traverse_tree(t, parent_path):
 | |
|     """Returns a list of all names in a tree."""
 | |
|     assert not parent_path or parent_path[-1] == "/"
 | |
|     full_node_name = parent_path + t.name
 | |
|     if t.is_file():
 | |
|         result = [full_node_name]
 | |
|     else:
 | |
|         name_prefix = full_node_name + "/"
 | |
|         result = [name_prefix]
 | |
|         for i in t.children:
 | |
|             result.extend(_traverse_tree(i, name_prefix))
 | |
|     return result
 | |
| 
 | |
| 
 | |
| def _get_text(path):
 | |
|     """Return a string with the textual contents of a file at PATH."""
 | |
|     fp = open(path, 'rb')
 | |
|     try:
 | |
|         return fp.read()
 | |
|     finally:
 | |
|         fp.close()
 | |
| 
 | |
| 
 | |
| def _handle_dir(path):
 | |
|     """
 | |
|       Main recursive worker function for build_tree(). Returns a newly created
 | |
|     tree node representing the given normalized folder path as well as the
 | |
|     maximum file/folder modification time detected under the same path.
 | |
| 
 | |
|     """
 | |
|     files = []
 | |
|     dirs = []
 | |
|     node = TreeNode(os.path.basename(path), children=[])
 | |
|     max_mtime = node.mtime = os.stat(path).st_mtime
 | |
| 
 | |
|     # List files & folders.
 | |
|     for f in os.listdir(path):
 | |
|         f = os.path.join(path, f)
 | |
|         if os.path.isdir(f):
 | |
|             dirs.append(f)
 | |
|         elif os.path.isfile(f):
 | |
|             files.append(f)
 | |
| 
 | |
|     # Add a child node for each file.
 | |
|     for f in files:
 | |
|         fcontents = _get_text(f)
 | |
|         new_file_node = TreeNode(os.path.basename(f), contents=fcontents)
 | |
|         new_file_node.mtime = os.stat(f).st_mtime
 | |
|         max_mtime = max(max_mtime, new_file_node.mtime)
 | |
|         node.add_child(new_file_node)
 | |
| 
 | |
|     # For each subdir, create a node, walk its tree, add it as a child.
 | |
|     for d in dirs:
 | |
|         new_dir_node, new_max_mtime = _handle_dir(d)
 | |
|         max_mtime = max(max_mtime, new_max_mtime)
 | |
|         node.add_child(new_dir_node)
 | |
| 
 | |
|     return node, max_mtime
 |