#!/usr/bin/env ruby # # The file and directory synchronization program. # For A2 2015 CompSci340 & SoftEng370 # # = Usage # If made executable. # sync directory1 directory2 # require 'fileutils' require 'digest' require 'time' require 'json' IGNORE = ['.', '..'] DELETED = 'deleted' TIME = 0 SHA256 = 1 # synchronizes the directories def sync(a, b) update_sync_file(a) update_sync_file(b) merge_from_into(a, b) merge_from_into(b, a) # then recursively do the subdirectories a_subdirs = subdirectories(a) b_subdirs = subdirectories(b) for dir in a_subdirs a_dir = a+'/'+dir b_dir = b+'/'+dir unless b_subdirs.include?(dir) raise "filename matches directory name" if File.file?(b_dir) FileUtils.mkdir(b_dir) end sync(a_dir, b_dir) b_subdirs -= [dir] end for dir in b_subdirs a_dir = a+'/'+dir b_dir = b+'/'+dir unless a_subdirs.include?(dir) raise "filename matches directory name" if File.file?(a_dir) FileUtils.mkdir(a_dir) end sync(b_dir, a_dir) end end # Selects all subdirectories in the given directory. # Excludes the directory itself and its parents. def subdirectories(dir) subdirs = Dir.entries(dir).select {|e| File.directory?(dir+"/"+e)} subdirs -= IGNORE return subdirs end # Updates the sync file in this directory def update_sync_file(dir) current_dir = Dir.pwd Dir.chdir dir sync_filename = '.sync' #+File.basename(dir) unless File.exist?(sync_filename) sync_file = File.new(sync_filename, 'w') sync_file.close sync = Hash.new else sync = JSON.parse(File.read(sync_filename)) end # select all of the true files (exclude the sync file) file_list = Dir.entries('.').select {|e| File.file?(e) and not (e == sync_filename)} handle_deleted_files(sync, file_list) handle_existing_files(sync, file_list) sync_file = File.new(sync_filename, 'w') sync_file.write(JSON.pretty_generate(sync)) sync_file.close ensure Dir.chdir current_dir end # Looks for and handles deleted files. def handle_deleted_files(sync, fileList) sync.each_pair do |file, pairs| unless pairs.first[SHA256] == DELETED pairs.unshift([Time.now, DELETED]) unless File.exist?(file) end end end # Matches the sync object with the existing files def handle_existing_files(sync, fileList) fileList.each do |file| time = File.mtime(file) digest = Digest::SHA256.hexdigest(File.read(file)) if time_pairs = sync[file] # existing list of time pairs for this file digest_pair = time_pairs.rassoc(digest) digest_time = digest_pair ? digest_pair[TIME] : nil if digest_time == nil # must be a new digest time_pairs.unshift([time, digest]) elsif digest_time != time # same digest, different time # modify the file time to match the digest File.utime(File.atime(file), Time.parse(digest_time), file) end else # no existing values for this file sync[file] = [[time, digest]] end end end # Merges files from one directory into another using their sync files. # The changes are made in the second directory. # This is not a problem, because it is also done the other way around. def merge_from_into (a, b) base_dir = Dir.pwd async = JSON.parse(File.read(a+'/.sync')) #+File.basename(a))) Dir.chdir(b) bsync = JSON.parse(File.read('.sync')) #+File.basename(b))) async.each do |file, a_digest_pairs| a_file = base_dir+'/'+a+'/'+file # the pathname of the file in dir 1 b_digest_pairs = bsync[file] if (deleted_pair = a_digest_pairs.first)[SHA256] == DELETED # deleted in a merge_deletion(file, deleted_pair, b_digest_pairs) elsif File.exist?(file) # exists in both, bDigestPairs is therefore not nil merge_existing(a_file, file, a_digest_pairs, b_digest_pairs) elsif not b_digest_pairs # never existed in this one FileUtils.cp(a_file, file) File.utime(0, File.mtime(a_file), file) bsync[file] = [a_digest_pairs[-1]] end if b_digest_pairs and (b_deleted = b_digest_pairs.first)[SHA256] == DELETED # doesn't exist in b, deleted? merge_undeletion(a_file, file, a_digest_pairs, b_deleted[TIME], bsync) end end sync_file = File.new('.sync', 'w') #+File.basename(b) sync_file.write(JSON.pretty_generate(bsync)) sync_file.close ensure Dir.chdir base_dir end # Makes sure that files deleted in the other directory are correctly deleted in this one. # - deletedFile: the file deleted in the other directory # - deletedPair: the pair of (time, digest) when deleted # - digestPairs: the pairs for the current directory def merge_deletion(deleted_file, deleted_pair, digest_pairs) if File.exist?(deleted_file) # deleted in a but still exists in b # it may have been recreated after the deletion deleted_times = digest_pairs.collect { |pair| pair[TIME] if pair[SHA256] == DELETED } unless deleted_times.include?(deleted_pair[TIME]) FileUtils.rm(deleted_file) digest_pairs.unshift(deleted_pair) end end end # Merges into the current directory any changes made to files occurring in both. # - aFile: the filename in the other directory # - file: the local filename # - aDigestPairs: the pairs from the other directory # - bDigestPairs: the pairs from this directory def merge_existing(a_file, file, a_digest_pairs, b_digest_pairs) a_newest = a_digest_pairs.first b_newest = b_digest_pairs.first # identical file in both, but earlier in a if (a_newest[SHA256] == b_newest[SHA256]) and (a_newest[TIME] < b_newest[TIME]) File.utime(0, Time.parse(a_newest[1]), file) b_newest[TIME] = a_newest[TIME] elsif # new in both but newer in a, or just plain newer in a (not (b_digest_pairs.rassoc(a_newest[SHA256]) or a_digest_pairs.rassoc(b_newest[SHA256])) and (a_newest[TIME] > b_newest[TIME])) or ((b_in_a = a_digest_pairs.rassoc(b_newest[SHA256])) and (a_digest_pairs.index(b_in_a) > 0)) FileUtils.cp(a_file, file) File.utime(0, Time.parse(a_newest[TIME]), file) b_digest_pairs.unshift(a_newest) end end # Merges a file that was deleted in both but recreated in the other directory. # - aFile: the filename in the other directory # - file: the local filename # - aDigestPairs: the pairs from the other directory # - bDeletedTime: the time the file was deleted in this directory # - bsync: the sync data for this directory def merge_undeletion(a_file, file, a_digest_pairs, b_deleted_time, b_sync) # check to see if it has been recreated in a a_deleted_times = a_digest_pairs.collect { |pair| pair[TIME] if pair[SHA256] == DELETED } if a_deleted_times.include?(b_deleted_time) # already deleted in a if a_deleted_times.first == nil # must have been recreated FileUtils.cp(a_file, file) File.utime(0, File.mtime(a_file), file) b_sync[file].unshift(a_digest_pairs.first) elsif a_deleted_times.first > b_deleted_time # recreated and then deleted b_sync[file].unshift(a_digest_pairs.first) end end end # Returns a value indicating if the directory names are ok # If one of the directories does not exist then it is created. def directories_ok?(a, b) return true if (File.directory?(a) and File.directory?(b)) return false if (File.file?(a) or File.file?(b)) if File.directory?(a) and not File.exist?(b) FileUtils.mkdir(b) return true elsif File.directory?(b) and not File.exist?(a) FileUtils.mkdir(a) return true end return false end if __FILE__ == $0 # true when this file is the program being run if ARGV.size != 2 or not directories_ok?(ARGV[0], ARGV[1]) puts "Usage: sync directory1 directory2" exit(1) end sync(ARGV[0], ARGV[1]) end