main
1#!/usr/bin/env python3
2"""
3Command line tool to validate Office document XML files against XSD schemas and tracked changes.
4
5Usage:
6 python validate.py <dir> --original <original_file>
7"""
8
9import argparse
10import sys
11from pathlib import Path
12
13from validation import DOCXSchemaValidator, PPTXSchemaValidator, RedliningValidator
14
15
16def main():
17 parser = argparse.ArgumentParser(description="Validate Office document XML files")
18 parser.add_argument(
19 "unpacked_dir",
20 help="Path to unpacked Office document directory",
21 )
22 parser.add_argument(
23 "--original",
24 required=True,
25 help="Path to original file (.docx/.pptx/.xlsx)",
26 )
27 parser.add_argument(
28 "-v",
29 "--verbose",
30 action="store_true",
31 help="Enable verbose output",
32 )
33 args = parser.parse_args()
34
35 # Validate paths
36 unpacked_dir = Path(args.unpacked_dir)
37 original_file = Path(args.original)
38 file_extension = original_file.suffix.lower()
39 assert unpacked_dir.is_dir(), f"Error: {unpacked_dir} is not a directory"
40 assert original_file.is_file(), f"Error: {original_file} is not a file"
41 assert file_extension in [".docx", ".pptx", ".xlsx"], (
42 f"Error: {original_file} must be a .docx, .pptx, or .xlsx file"
43 )
44
45 # Run validations
46 match file_extension:
47 case ".docx":
48 validators = [DOCXSchemaValidator, RedliningValidator]
49 case ".pptx":
50 validators = [PPTXSchemaValidator]
51 case _:
52 print(f"Error: Validation not supported for file type {file_extension}")
53 sys.exit(1)
54
55 # Run validators
56 success = True
57 for V in validators:
58 validator = V(unpacked_dir, original_file, verbose=args.verbose)
59 if not validator.validate():
60 success = False
61
62 if success:
63 print("All validations PASSED!")
64
65 sys.exit(0 if success else 1)
66
67
68if __name__ == "__main__":
69 main()