"""File extraction utilities for DMARC reports""" import gzip import zipfile import tempfile import os from pathlib import Path def extract_files(file_path): """Extract XML content from gzip or zip files Args: file_path: Path to the compressed file Returns: str: XML content if successful, None otherwise """ file_path = Path(file_path) try: if file_path.suffix == '.gz': return _extract_gzip(file_path) elif file_path.suffix == '.zip': return _extract_zip(file_path) else: # Assume it's already XML return file_path.read_text() except Exception as e: print(f"Error extracting {file_path}: {e}") return None def _extract_gzip(file_path): """Extract content from gzip file""" with gzip.open(file_path, 'rt') as f: return f.read() def _extract_zip(file_path): """Extract content from zip file For zip files, we look for XML files inside and return the first one """ with zipfile.ZipFile(file_path, 'r') as zip_ref: # List all files in the zip file_list = zip_ref.namelist() # Find the first XML file xml_file = None for filename in file_list: if filename.lower().endswith('.xml'): xml_file = filename break if xml_file: with zip_ref.open(xml_file) as f: return f.read().decode('utf-8') else: # If no XML file found, try the first file if file_list: with zip_ref.open(file_list[0]) as f: return f.read().decode('utf-8') return None