# Copyright 2003 Dave Abrahams
# Copyright 2001, 2002 Vladimir Prus
# Copyright 2012 Jurko Gospodnetic
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE.txt or copy at
# https://www.bfgroup.xyz/b2/LICENSE.txt)

###############################################################################
#
# Based in part on an old Subversion tree.py source file (tools for comparing
# directory trees). See http://subversion.tigris.org for more information.
#
# Copyright (c) 2001 Sam Tobin-Hochstadt.  All rights reserved.
#
# This software is licensed as described in the file COPYING, which you should
# have received as part of this distribution. The terms are also available at
# http://subversion.tigris.org/license-1.html. If newer versions of this
# license are posted there, you may use a newer version instead, at your
# option.
#
###############################################################################

from __future__ import print_function

import os
import os.path
import stat
import sys


class TreeNode:
    """
      Fundamental data type used to build file system tree structures.

      If CHILDREN is None, then the node represents a file. Otherwise, CHILDREN
    is a list of the nodes representing that directory's children.

      NAME is simply the name of the file or directory. CONTENTS is a string
    holding the file's contents (if a file).

    """

    def __init__(self, name, children=None, contents=None):
        assert children is None or contents is None
        self.name = name
        self.mtime = 0
        self.children = children
        self.contents = contents
        self.path = name

    def add_child(self, newchild):
        assert not self.is_file()
        for a in self.children:
            if a.name == newchild.name:
                if newchild.is_file():
                    a.contents = newchild.contents
                    a.path = os.path.join(self.path, newchild.name)
                else:
                    for i in newchild.children:
                        a.add_child(i)
                break
        else:
            self.children.append(newchild)
            newchild.path = os.path.join(self.path, newchild.name)

    def get_child(self, name):
        """
          If the given TreeNode directory NODE contains a child named NAME,
        return the child; else, return None.

        """
        for n in self.children:
            if n.name == name:
                return n

    def is_file(self):
        return self.children is None

    def pprint(self):
        print(" * Node name: %s" % self.name)
        print("    Path:     %s" % self.path)
        print("    Contents: %s" % self.contents)
        if self.is_file():
            print("    Children: is a file.")
        else:
            print("    Children: %d" % len(self.children))


class TreeDifference:
    def __init__(self):
        self.added_files = []
        self.removed_files = []
        self.modified_files = []
        self.touched_files = []

    def append(self, other):
        self.added_files.extend(other.added_files)
        self.removed_files.extend(other.removed_files)
        self.modified_files.extend(other.modified_files)
        self.touched_files.extend(other.touched_files)

    def ignore_directories(self):
        """Removes directories from our lists of found differences."""
        not_dir = lambda x : x[-1] != "/"
        self.added_files = list(filter(not_dir, self.added_files))
        self.removed_files = list(filter(not_dir, self.removed_files))
        self.modified_files = list(filter(not_dir, self.modified_files))
        self.touched_files = list(filter(not_dir, self.touched_files))

    def pprint(self, file=sys.stdout):
        file.write("Added files   : %s\n" % self.added_files)
        file.write("Removed files : %s\n" % self.removed_files)
        file.write("Modified files: %s\n" % self.modified_files)
        file.write("Touched files : %s\n" % self.touched_files)

    def empty(self):
        return not (self.added_files or self.removed_files or
            self.modified_files or self.touched_files)


def build_tree(path):
    """
      Takes PATH as the folder path, walks the file system below that path, and
    creates a tree structure based on any files and folders found there.
    Returns the prepared tree structure plus the maximum file modification
    timestamp under the given folder.

    """
    return _handle_dir(os.path.normpath(path))


def tree_difference(a, b):
    """Compare TreeNodes A and B, and create a TreeDifference instance."""
    return _do_tree_difference(a, b, "", True)


def _do_tree_difference(a, b, parent_path, root=False):
    """Internal recursive worker function for tree_difference()."""

    # We do not want to list root node names.
    if root:
        assert not parent_path
        assert not a.is_file()
        assert not b.is_file()
        full_path = ""
    else:
        assert a.name == b.name
        full_path = parent_path + a.name
    result = TreeDifference()

    # A and B are both files.
    if a.is_file() and b.is_file():
        if a.contents != b.contents:
            result.modified_files.append(full_path)
        elif a.mtime != b.mtime:
            result.touched_files.append(full_path)
        return result

    # Directory converted to file.
    if not a.is_file() and b.is_file():
        result.removed_files.extend(_traverse_tree(a, parent_path))
        result.added_files.append(full_path)

    # File converted to directory.
    elif a.is_file() and not b.is_file():
        result.removed_files.append(full_path)
        result.added_files.extend(_traverse_tree(b, parent_path))

    # A and B are both directories.
    else:
        if full_path:
            full_path += "/"
        accounted_for = []  # Children present in both trees.
        for a_child in a.children:
            b_child = b.get_child(a_child.name)
            if b_child:
                accounted_for.append(b_child)
                result.append(_do_tree_difference(a_child, b_child, full_path))
            else:
                result.removed_files.append(full_path + a_child.name)
        for b_child in b.children:
            if b_child not in accounted_for:
                result.added_files.extend(_traverse_tree(b_child, full_path))

    return result


def _traverse_tree(t, parent_path):
    """Returns a list of all names in a tree."""
    assert not parent_path or parent_path[-1] == "/"
    full_node_name = parent_path + t.name
    if t.is_file():
        result = [full_node_name]
    else:
        name_prefix = full_node_name + "/"
        result = [name_prefix]
        for i in t.children:
            result.extend(_traverse_tree(i, name_prefix))
    return result


def _get_text(path):
    """Return a string with the textual contents of a file at PATH."""
    fp = open(path, 'rb')
    try:
        return fp.read()
    finally:
        fp.close()


def _handle_dir(path):
    """
      Main recursive worker function for build_tree(). Returns a newly created
    tree node representing the given normalized folder path as well as the
    maximum file/folder modification time detected under the same path.

    """
    files = []
    dirs = []
    node = TreeNode(os.path.basename(path), children=[])
    max_mtime = node.mtime = os.stat(path).st_mtime

    # List files & folders.
    for f in os.listdir(path):
        f = os.path.join(path, f)
        if os.path.isdir(f):
            dirs.append(f)
        elif os.path.isfile(f):
            files.append(f)

    # Add a child node for each file.
    for f in files:
        fcontents = _get_text(f)
        new_file_node = TreeNode(os.path.basename(f), contents=fcontents)
        new_file_node.mtime = os.stat(f).st_mtime
        max_mtime = max(max_mtime, new_file_node.mtime)
        node.add_child(new_file_node)

    # For each subdir, create a node, walk its tree, add it as a child.
    for d in dirs:
        new_dir_node, new_max_mtime = _handle_dir(d)
        max_mtime = max(max_mtime, new_max_mtime)
        node.add_child(new_dir_node)

    return node, max_mtime