From 7d3432e055dc63935ce6df2b56d655aadf88938c Mon Sep 17 00:00:00 2001 From: Max Resnick Date: Tue, 2 Dec 2025 21:21:50 -0800 Subject: feat: init commit of tool --- dmarc_analyzer/extractor.py | 65 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 dmarc_analyzer/extractor.py (limited to 'dmarc_analyzer/extractor.py') diff --git a/dmarc_analyzer/extractor.py b/dmarc_analyzer/extractor.py new file mode 100644 index 0000000..d12d67b --- /dev/null +++ b/dmarc_analyzer/extractor.py @@ -0,0 +1,65 @@ +"""File extraction utilities for DMARC reports""" + +import gzip +import zipfile +import tempfile +import os +from pathlib import Path + + +def extract_files(file_path): + """Extract XML content from gzip or zip files + + Args: + file_path: Path to the compressed file + + Returns: + str: XML content if successful, None otherwise + """ + file_path = Path(file_path) + + try: + if file_path.suffix == '.gz': + return _extract_gzip(file_path) + elif file_path.suffix == '.zip': + return _extract_zip(file_path) + else: + # Assume it's already XML + return file_path.read_text() + except Exception as e: + print(f"Error extracting {file_path}: {e}") + return None + + +def _extract_gzip(file_path): + """Extract content from gzip file""" + with gzip.open(file_path, 'rt') as f: + return f.read() + + +def _extract_zip(file_path): + """Extract content from zip file + + For zip files, we look for XML files inside and return the first one + """ + with zipfile.ZipFile(file_path, 'r') as zip_ref: + # List all files in the zip + file_list = zip_ref.namelist() + + # Find the first XML file + xml_file = None + for filename in file_list: + if filename.lower().endswith('.xml'): + xml_file = filename + break + + if xml_file: + with zip_ref.open(xml_file) as f: + return f.read().decode('utf-8') + else: + # If no XML file found, try the first file + if file_list: + with zip_ref.open(file_list[0]) as f: + return f.read().decode('utf-8') + + return None \ No newline at end of file -- cgit v1.2.3