Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 1 | #!/usr/bin/env python |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2 | |
| 3 | from __future__ import print_function |
| 4 | |
| 5 | desc = '''Generate the difference of two YAML files into a new YAML file (works on |
| 6 | pair of directories too). A new attribute 'Added' is set to True or False |
| 7 | depending whether the entry is added or removed from the first input to the |
| 8 | next. |
| 9 | |
| 10 | The tools requires PyYAML.''' |
| 11 | |
| 12 | import yaml |
| 13 | # Try to use the C parser. |
| 14 | try: |
| 15 | from yaml import CLoader as Loader |
| 16 | except ImportError: |
| 17 | from yaml import Loader |
| 18 | |
| 19 | import optrecord |
| 20 | import argparse |
| 21 | from collections import defaultdict |
| 22 | |
| 23 | if __name__ == '__main__': |
| 24 | parser = argparse.ArgumentParser(description=desc) |
| 25 | parser.add_argument( |
| 26 | 'yaml_dir_or_file_1', |
| 27 | help='An optimization record file or a directory searched for optimization ' |
| 28 | 'record files that are used as the old version for the comparison') |
| 29 | parser.add_argument( |
| 30 | 'yaml_dir_or_file_2', |
| 31 | help='An optimization record file or a directory searched for optimization ' |
| 32 | 'record files that are used as the new version for the comparison') |
| 33 | parser.add_argument( |
| 34 | '--jobs', |
| 35 | '-j', |
| 36 | default=None, |
| 37 | type=int, |
| 38 | help='Max job count (defaults to %(default)s, the current CPU count)') |
| 39 | parser.add_argument( |
| 40 | '--max-size', |
| 41 | '-m', |
| 42 | default=100000, |
| 43 | type=int, |
| 44 | help='Maximum number of remarks stored in an output file') |
| 45 | parser.add_argument( |
| 46 | '--no-progress-indicator', |
| 47 | '-n', |
| 48 | action='store_true', |
| 49 | default=False, |
| 50 | help='Do not display any indicator of how many YAML files were read.') |
| 51 | parser.add_argument('--output', '-o', default='diff{}.opt.yaml') |
| 52 | args = parser.parse_args() |
| 53 | |
| 54 | files1 = optrecord.find_opt_files(args.yaml_dir_or_file_1) |
| 55 | files2 = optrecord.find_opt_files(args.yaml_dir_or_file_2) |
| 56 | |
| 57 | print_progress = not args.no_progress_indicator |
| 58 | all_remarks1, _, _ = optrecord.gather_results(files1, args.jobs, print_progress) |
| 59 | all_remarks2, _, _ = optrecord.gather_results(files2, args.jobs, print_progress) |
| 60 | |
| 61 | added = set(all_remarks2.values()) - set(all_remarks1.values()) |
| 62 | removed = set(all_remarks1.values()) - set(all_remarks2.values()) |
| 63 | |
| 64 | for r in added: |
| 65 | r.Added = True |
| 66 | for r in removed: |
| 67 | r.Added = False |
| 68 | |
| 69 | result = list(added | removed) |
| 70 | for r in result: |
| 71 | r.recover_yaml_structure() |
| 72 | |
| 73 | for i in range(0, len(result), args.max_size): |
| 74 | with open(args.output.format(i / args.max_size), 'w') as stream: |
| 75 | yaml.dump_all(result[i:i + args.max_size], stream) |