idK9xB5G

· 8 months ago · Mar 13, 2025, 04:50 PM
1# ═════════════════════════════════════════════════════════════════════════════
2# ═══════════════════════════ ENHANCEMENT OVERVIEW ═══════════════════════════
3# ═════════════════════════════════════════════════════════════════════════════
4"""
5SkypeExporter Enhancements:
6
71. Basic Mode:
8   - Added simplified procedural workflow
9   - Streamlined user interaction
10   - Direct prompts with clear instructions
11
122. Enhanced Filename Sanitization:
13   - Reserved Windows name handling
14   - Cross-platform compatibility
15   - Length limits enforcement
16   - Special character handling
17
183. Memory Profiling & Optimization:
19   - Memory usage tracking
20   - Dynamic batch size adjustment
21   - Automated garbage collection
22   - System-aware resource allocation
23
244. PostgreSQL Export:
25   - Normalized database schema
26   - SQLAlchemy ORM integration
27   - Batch insertion optimization
28   - Connection pooling and management
29   - Configurable database settings
30"""
31
32# ═════════════════════════════════════════════════════════════════════════════
33# ═══════════════════════════ IMPORTS AND SETUP ═══════════════════════════════
34# ═════════════════════════════════════════════════════════════════════════════
35
36import argparse
37import asyncio
38import concurrent.futures
39import dataclasses
40import datetime
41import fnmatch
42import gc
43import html
44import importlib.metadata
45import json
46import logging
47import os
48import platform
49import psutil
50import re
51import shutil
52import signal
53import sys
54import tarfile
55import tempfile
56import time
57import traceback
58import uuid
59import zipfile
60from abc import ABC, abstractmethod
61from contextlib import contextmanager
62from dataclasses import dataclass, field
63from enum import Enum, auto
64from pathlib import Path
65from typing import (Any, Dict, Generator, List, Optional, Set, Tuple)
66
67# Import for SQLAlchemy
68try:
69    from sqlalchemy import (
70        Column, ForeignKey, Integer, String, DateTime, Boolean, Text, create_engine,
71        select, func, Index, UniqueConstraint
72    )
73    from sqlalchemy.orm import relationship, Session, sessionmaker, declarative_base
74    from sqlalchemy.ext.declarative import declared_attr
75    SQLALCHEMY_AVAILABLE = True
76except ImportError:
77    SQLALCHEMY_AVAILABLE = False
78
79# Import for Rich and other optional libraries
80try:
81    from rich.console import Console
82    from rich.progress import Progress, TextColumn, BarColumn, TimeElapsedColumn, TimeRemainingColumn
83    from rich.table import Table
84    from rich.panel import Panel
85    from rich.markdown import Markdown
86    RICH_AVAILABLE = True
87except ImportError:
88    RICH_AVAILABLE = False
89
90try:
91    from tqdm import tqdm
92    TQDM_AVAILABLE = True
93except ImportError:
94    TQDM_AVAILABLE = False
95
96# ═════════════════════════════════════════════════════════════════════════════
97# ═══════════════════════════ CUSTOM EXCEPTIONS ══════════════════════════════
98# ═════════════════════════════════════════════════════════════════════════════
99
100class SkypeExporterError(Exception):
101    """Base exception for all Skype Exporter errors."""
102    pass
103
104class ConfigError(SkypeExporterError):
105    """Error in configuration settings."""
106    pass
107
108class FileReadError(SkypeExporterError):
109    """Error reading input files."""
110    pass
111
112class FileWriteError(SkypeExporterError):
113    """Error writing output files."""
114    pass
115
116class ParseError(SkypeExporterError):
117    """Error parsing Skype data."""
118    pass
119
120class TimestampError(ParseError):
121    """Error parsing timestamps."""
122    pass
123
124class ExportError(SkypeExporterError):
125    """Error exporting conversations."""
126    pass
127
128class DatabaseError(SkypeExporterError):
129    """Error with database operations."""
130    pass
131
132class MemoryError(SkypeExporterError):
133    """Error with memory management."""
134    pass
135
136# ═════════════════════════════════════════════════════════════════════════════
137# ═══════════════════════════ DEPENDENCY MANAGEMENT ═══════════════════════════
138# ═════════════════════════════════════════════════════════════════════════════
139
140REQUIRED_PACKAGES = {
141    "beautifulsoup4": "4.9.0",
142    "lxml": "4.5.0",
143    "colorama": "0.4.3",
144    "tqdm": "4.45.0",
145    "rich": "10.0.0",
146    "jinja2": "3.0.0",
147    "markdown": "3.3.0",
148    "pyyaml": "6.0.0",
149    "psutil": "5.8.0",  # Added for memory monitoring
150    "sqlalchemy": "1.4.0",  # Added for PostgreSQL export
151    "psycopg2-binary": "2.9.0",  # Added for PostgreSQL connection
152    "alembic": "1.7.0",  # Added for database migrations
153}
154
155def check_dependencies() -> Dict[str, bool]:
156    """
157    Check if required dependencies are installed and at the correct version.
158
159    Returns:
160        Dict[str, bool]: Dictionary of package names and whether they're properly installed
161    """
162    result = {}
163
164    for package, min_version in REQUIRED_PACKAGES.items():
165        try:
166            installed_version = importlib.metadata.version(package)
167            version_ok = _compare_versions(installed_version, min_version) >= 0
168            result[package] = version_ok
169        except importlib.metadata.PackageNotFoundError:
170            result[package] = False
171
172    return result
173
174def _compare_versions(version1: str, version2: str) -> int:
175    """
176    Compare two version strings.
177
178    Args:
179        version1: First version string
180        version2: Second version string
181
182    Returns:
183        int: 1 if version1 > version2, 0 if equal, -1 if version1 < version2
184    """
185    def normalize(v):
186        return [int(x) for x in re.sub(r'(\.0+)*$', '', v).split(".")]
187
188    v1 = normalize(version1)
189    v2 = normalize(version2)
190
191    for i in range(max(len(v1), len(v2))):
192        n1 = v1[i] if i < len(v1) else 0
193        n2 = v2[i] if i < len(v2) else 0
194        if n1 > n2:
195            return 1
196        elif n1 < n2:
197            return -1
198
199    return 0
200
201def install_dependencies() -> None:
202    """
203    Check for missing dependencies and provide installation instructions.
204
205    Instead of automatically installing packages, this now warns the user
206    and provides instructions for manual installation.
207    """
208    dependencies = check_dependencies()
209    missing = []
210
211    for dep, installed in dependencies.items():
212        if not installed:
213            missing.append(dep)
214
215    if missing:
216        print("\nWARNING: The following dependencies are missing:")
217        for dep in missing:
218            print(f"  - {dep}")
219
220        print("\nPlease install them manually with:")
221        print(f"  pip install {' '.join(missing)}")
222        print("\nContinuing with limited functionality. Some features may not work correctly.")
223    else:
224        print("All dependencies are installed.")
225
226# Import optional dependencies, which may fail
227try:
228    from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
229    import warnings
230    warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
231    BEAUTIFULSOUP_AVAILABLE = True
232except ImportError:
233    BEAUTIFULSOUP_AVAILABLE = False
234
235try:
236    from rich.console import Console
237    from rich.progress import Progress, TextColumn, BarColumn, TimeElapsedColumn, TimeRemainingColumn
238    from rich.panel import Panel
239    from rich.table import Table
240    from rich.syntax import Syntax
241    from rich.logging import RichHandler
242    from rich.traceback import install as install_rich_traceback
243    from rich.prompt import Prompt, Confirm
244    RICH_AVAILABLE = True
245    install_rich_traceback()
246except ImportError:
247    RICH_AVAILABLE = False
248
249try:
250    from colorama import init as colorama_init
251    from colorama import Fore, Back, Style
252    COLORAMA_AVAILABLE = True
253    colorama_init()
254except ImportError:
255    COLORAMA_AVAILABLE = False
256
257try:
258    from tqdm import tqdm
259    TQDM_AVAILABLE = True
260except ImportError:
261    TQDM_AVAILABLE = False
262
263try:
264    import markdown
265    MARKDOWN_AVAILABLE = True
266except ImportError:
267    MARKDOWN_AVAILABLE = False
268
269try:
270    import jinja2
271    JINJA2_AVAILABLE = True
272except ImportError:
273    JINJA2_AVAILABLE = False
274
275try:
276    import yaml
277    YAML_AVAILABLE = True
278except ImportError:
279    YAML_AVAILABLE = False
280
281try:
282    import sqlalchemy
283    from sqlalchemy import create_engine, Column, Integer, String, Text, DateTime, Boolean, ForeignKey
284    from sqlalchemy.orm import sessionmaker, relationship, Session, declarative_base
285    SQLALCHEMY_AVAILABLE = True
286except ImportError:
287    SQLALCHEMY_AVAILABLE = False
288
289try:
290    import psycopg2
291    PSYCOPG2_AVAILABLE = True
292except ImportError:
293    PSYCOPG2_AVAILABLE = False
294
295# ═════════════════════════════════════════════════════════════════════════════
296# ═══════════════════════════ MEMORY MANAGEMENT ═══════════════════════════════
297# ═════════════════════════════════════════════════════════════════════════════
298
299class MemoryMonitor:
300    """
301    Monitor and manage memory usage during processing.
302
303    This class provides utilities to track memory usage, optimize batch sizes,
304    and trigger garbage collection based on system resources.
305    """
306
307    def __init__(self, ctx: 'AppContext'):
308        """Initialize memory monitor with application context."""
309        self.ctx = ctx
310        self.logger = ctx.logger.getChild('memory')
311        self.process = psutil.Process(os.getpid())  # Initialize the process object
312        self.usage_history = []
313        self.memory_samples = []  # Restore memory samples list
314        self.memory_timestamps = []  # Restore timestamps list
315        self.peak_usage = 0
316        self.last_memory_percent = None
317        self.last_gc_time = time.time()
318        self.memory_target = ctx.options.memory_threshold_percent
319        self.check_counter = 0  # Counter for adaptive memory checks
320
321        # Capture initial memory usage
322        self.record_memory_usage()
323
324        self.logger.debug(f"Memory monitor initialized with target: {self.memory_target}%")
325
326    def get_memory_usage_mb(self) -> float:
327        """
328        Get current memory usage in megabytes.
329
330        Returns:
331            Memory usage in MB
332        """
333        return self.process.memory_info().rss / (1024 * 1024)
334
335    def get_memory_percent(self) -> float:
336        """
337        Get memory usage as percentage of system memory.
338
339        Returns:
340            Memory usage percentage
341        """
342        return self.process.memory_percent()
343
344    def get_system_memory_mb(self) -> float:
345        """
346        Get total system memory in megabytes.
347
348        Returns:
349            Total system memory in MB
350        """
351        return psutil.virtual_memory().total / (1024 * 1024)
352
353    def record_memory_usage(self) -> None:
354        """Record current memory usage for tracking."""
355        current_usage_mb = self.get_memory_usage_mb()
356        current_time = time.time()
357
358        # Record in both tracking mechanisms for backward compatibility
359        self.usage_history.append(current_usage_mb)
360        self.memory_samples.append(current_usage_mb)
361        self.memory_timestamps.append(current_time)
362
363        # Keep only the last 100 samples in both arrays
364        if len(self.memory_samples) > 100:
365            self.memory_samples.pop(0)
366            self.memory_timestamps.pop(0)
367
368        if len(self.usage_history) > 100:
369            self.usage_history.pop(0)
370
371    def check_memory(self) -> bool:
372        """
373        Check memory usage and optimize if needed.
374
375        Returns:
376            True if optimization was performed, False otherwise
377        """
378        # Use an adaptive check interval based on previous memory usage
379        self.check_counter += 1
380
381        # Default intervals for memory checks (operations between checks)
382        low_usage_interval = 100    # Less frequent checks when memory usage is low
383        medium_usage_interval = 25  # Medium frequency checks
384        high_usage_interval = 5     # Frequent checks when memory is high
385
386        # Determine the check interval based on last measured memory percentage
387        if self.last_memory_percent is None:
388            check_interval = medium_usage_interval
389        elif self.last_memory_percent < 30:
390            check_interval = low_usage_interval
391        elif self.last_memory_percent < 60:
392            check_interval = medium_usage_interval
393        else:
394            check_interval = high_usage_interval
395
396        # Skip check if we haven't reached the interval, unless it's the first check
397        if self.check_counter % check_interval != 0 and self.last_memory_percent is not None:
398            return False
399
400        # Get current memory usage
401        memory_percent = self.get_memory_percent()
402        memory_usage_mb = self.get_memory_usage_mb()
403        self.last_memory_percent = memory_percent
404
405        # Record usage for historical tracking
406        self.record_memory_usage()
407
408        if memory_usage_mb > self.peak_usage:
409            self.peak_usage = memory_usage_mb
410
411        # Check if memory usage exceeds threshold
412        if memory_percent > self.memory_target:
413            self.logger.warning(
414                f"Memory usage high: {memory_percent:.1f}% ({memory_usage_mb:.1f} MB), "
415                f"optimizing..."
416            )
417            self._optimize_memory()
418            return True
419
420        # Occasionally collect garbage even if memory usage is low
421        # but at a lower frequency (every 5000 operations or 60 seconds)
422        elif (self.check_counter % 5000 == 0 or
423              (time.time() - self.last_gc_time > 60)):
424            self.logger.debug(
425                f"Performing routine garbage collection: {memory_percent:.1f}% "
426                f"({memory_usage_mb:.1f} MB)"
427            )
428            self._collect_garbage()
429
430        self.logger.debug(
431            f"Memory usage: {memory_percent:.1f}% ({memory_usage_mb:.1f} MB) "
432            f"of {self.get_system_memory_mb():.1f} MB"
433        )
434
435        return False
436
437    def _optimize_memory(self) -> None:
438        """Optimize memory usage by adjusting batch sizes and collecting garbage."""
439        self.logger.info("Optimizing memory usage...")
440
441        # Reduce batch size to conserve memory
442        current_batch_size = self.ctx.options.batch_size
443        new_batch_size = max(100, current_batch_size // 2)
444
445        if new_batch_size < current_batch_size:
446            self.logger.info(f"Reducing batch size from {current_batch_size} to {new_batch_size}")
447            self.ctx.options.batch_size = new_batch_size
448
449        # Reduce max workers if memory usage is very high
450        if self.get_memory_percent() > 90 and self.ctx.options.max_workers > 2:
451            self.logger.warning("Critical memory usage - reducing worker threads")
452            self.ctx.options.max_workers = max(1, self.ctx.options.max_workers // 2)
453
454        # Force garbage collection
455        self._collect_garbage()
456
457    def _collect_garbage(self) -> None:
458        """Force garbage collection to free memory."""
459        self.logger.debug("Running garbage collection...")
460
461        before_mb = self.get_memory_usage_mb()
462        gc.collect()
463        after_mb = self.get_memory_usage_mb()
464
465        freed_mb = before_mb - after_mb
466        self.logger.debug(f"Garbage collection freed {freed_mb:.2f} MB")
467
468    def calculate_optimal_batch_size(self, item_count: int) -> int:
469        """
470        Calculate optimal batch size based on available system resources.
471
472        Args:
473            item_count: Total number of items to process
474
475        Returns:
476            Optimal batch size
477        """
478        # Get available memory in MB
479        available_memory = psutil.virtual_memory().available / (1024 * 1024)
480
481        # Estimate memory per item (using exponential moving average if we have samples)
482        current_memory = self.get_memory_usage_mb()
483        memory_per_item = 0.1  # Default assumption: 100KB per item
484
485        # Calculate optimal batch size - aim to use at most 20% of available memory
486        max_memory_to_use = available_memory * 0.2
487        optimal_batch_size = int(max_memory_to_use / memory_per_item)
488
489        # Constrain within reasonable limits
490        optimal_batch_size = min(optimal_batch_size, 5000)  # Never go above 5000
491        optimal_batch_size = max(optimal_batch_size, 100)   # Never go below 100
492
493        # Round to nearest 100 for cleaner numbers
494        optimal_batch_size = round(optimal_batch_size / 100) * 100
495
496        self.logger.debug(f"Calculated optimal batch size: {optimal_batch_size} "
497                        f"(available memory: {available_memory:.2f} MB)")
498
499        return optimal_batch_size
500
501    def get_memory_report(self) -> Dict[str, Any]:
502        """
503        Generate a report on memory usage.
504
505        Returns:
506            Dictionary with memory statistics
507        """
508        return {
509            "current_usage_mb": self.get_memory_usage_mb(),
510            "current_usage_percent": self.get_memory_percent(),
511            "peak_usage_mb": max(self.memory_samples) if self.memory_samples else self.get_memory_usage_mb(),
512            "system_memory_mb": self.get_system_memory_mb(),
513            "batch_size": self.ctx.options.batch_size,
514            "max_workers": self.ctx.options.max_workers
515        }
516
517# ═════════════════════════════════════════════════════════════════════════════
518# ═══════════════════════════ FILEPATH UTILITIES ═══════════════════════════════
519# ═════════════════════════════════════════════════════════════════════════════
520
521def sanitize_filename(name: str, max_length: int = 200) -> str:
522    """
523    Sanitize a string to be used as a filename across all platforms.
524
525    Handles invalid characters, reserved Windows names, and length limitations.
526
527    Args:
528        name: Original name to sanitize
529        max_length: Maximum length for the filename
530
531    Returns:
532        Sanitized filename string safe for all platforms
533    """
534    if not name:
535        return "unnamed"
536
537    # Handle file system restrictions
538    # 1. Replace invalid characters
539    sanitized = re.sub(r'[<>:"/\\|?*\x00-\x1F]', '_', name)
540
541    # 2. Check for reserved Windows names (CON, PRN, AUX, etc.)
542    reserved_names = {
543        'CON', 'PRN', 'AUX', 'NUL',
544        'COM1', 'COM2', 'COM3', 'COM4', 'COM5', 'COM6', 'COM7', 'COM8', 'COM9',
545        'LPT1', 'LPT2', 'LPT3', 'LPT4', 'LPT5', 'LPT6', 'LPT7', 'LPT8', 'LPT9'
546    }
547
548    # Check if name matches a reserved name (either exactly or with an extension)
549    name_parts = sanitized.split('.')
550    if name_parts[0].upper() in reserved_names:
551        sanitized = f"_{sanitized}"
552
553    # 3. Enforce length limit with smart truncation
554    if len(sanitized) > max_length:
555        # Keep the extension if present
556        if '.' in sanitized:
557            extension = '.' + sanitized.split('.')[-1]
558            base_name = '.'.join(sanitized.split('.')[:-1])
559
560            # Truncate the base name, leaving room for ellipsis and extension
561            available_length = max_length - len(extension) - 3  # 3 for "..."
562            sanitized = base_name[:available_length] + "..." + extension
563        else:
564            sanitized = sanitized[:max_length-3] + "..."
565
566    # 4. Ensure name doesn't end with space or period (Windows restriction)
567    sanitized = sanitized.rstrip(' .')
568
569    # 5. If empty after sanitization, provide a fallback
570    if not sanitized:
571        sanitized = "unnamed_file"
572
573    return sanitized
574
575def ensure_directory(path: Path) -> Path:
576    """
577    Ensure a directory exists, creating it if necessary.
578
579    Args:
580        path: Directory path to ensure
581
582    Returns:
583        Path to the directory
584    """
585    path.mkdir(parents=True, exist_ok=True)
586    return path
587
588def get_unique_filename(directory: Path, base_name: str, extension: str) -> Path:
589    """
590    Generate a unique filename by appending a counter if needed.
591
592    Args:
593        directory: Directory path
594        base_name: Base filename
595        extension: File extension
596
597    Returns:
598        Path to a unique filename
599    """
600    # Ensure extension starts with a dot
601    if extension and not extension.startswith('.'):
602        extension = '.' + extension
603
604    # First try the original name
605    file_path = directory / f"{base_name}{extension}"
606    if not file_path.exists():
607        return file_path
608
609    # Add counter until we find an unused name
610    counter = 1
611    while True:
612        file_path = directory / f"{base_name}_{counter}{extension}"
613        if not file_path.exists():
614            return file_path
615        counter += 1
616
617# ═════════════════════════════════════════════════════════════════════════════
618# ═══════════════════════════ CONFIGURATION AND SETUP ═════════════════════════
619# ═════════════════════════════════════════════════════════════════════════════
620
621class LogLevel(Enum):
622    """Log levels with descriptive names."""
623    DEBUG = logging.DEBUG
624    INFO = logging.INFO
625    WARNING = logging.WARNING
626    ERROR = logging.ERROR
627    CRITICAL = logging.CRITICAL
628
629class OutputFormat(Enum):
630    """Supported output formats for exporting conversations."""
631    TEXT = auto()
632    HTML = auto()
633    MARKDOWN = auto()
634    JSON = auto()
635    POSTGRESQL = auto()  # Added support for PostgreSQL export
636    ALL = auto()
637
638@dataclass
639class DatabaseConfig:
640    """Configuration for database connections."""
641    engine: str = "postgresql"
642    host: str = "localhost"
643    port: int = 5432
644    database: str = "skype_export"
645    username: str = "postgres"
646    password: str = ""
647    schema: str = "public"
648    connection_pool_size: int = 5
649    connection_max_overflow: int = 10
650    connection_timeout: int = 30
651    echo_sql: bool = False
652
653    @property
654    def connection_string(self) -> str:
655        """Generate SQLAlchemy connection string."""
656        return (f"{self.engine}://{self.username}:{self.password}@"
657                f"{self.host}:{self.port}/{self.database}")
658
659@dataclass
660class ExportOptions:
661    """Configuration options for the export process."""
662    output_dir: Path = Path.cwd() / "skype_exports"
663    format: OutputFormat = OutputFormat.TEXT
664    anonymize: bool = False
665    include_timestamps: bool = True
666    use_local_time: bool = True
667    include_metadata: bool = True
668    include_message_ids: bool = False
669    parallel: bool = True
670    max_workers: int = max(1, os.cpu_count() or 4)
671    batch_size: int = 1000
672    timezone: Optional[str] = None
673    pretty_print: bool = True
674    compress_output: bool = False
675    filter_pattern: Optional[str] = None
676    date_range: Optional[Tuple[datetime.date, datetime.date]] = None
677    include_conversation_stats: bool = True
678    media_links: bool = False
679    strip_html: bool = True
680    debug_mode: bool = False
681    basic_mode: bool = False  # Added for basic mode
682    enable_memory_optimization: bool = True  # Added for memory optimization
683    memory_profile: bool = False  # Added for memory profiling
684    memory_threshold_percent: int = 75  # Added for memory monitoring
685    database_config: DatabaseConfig = field(default_factory=DatabaseConfig)
686
687@dataclass
688class AppContext:
689    """Application context with shared resources and state."""
690    options: ExportOptions = field(default_factory=ExportOptions)
691    logger: logging.Logger = field(default_factory=lambda: logging.getLogger("original_scripts.testing"))
692    console: Any = field(default=None)
693    temp_dir: Optional[Path] = None
694    start_time: float = field(default_factory=time.time)
695    user_id: Optional[str] = None
696    user_display_name: Optional[str] = None
697    export_date: Optional[str] = None
698    export_time: Optional[str] = None
699    total_conversations: int = 0
700    total_messages: int = 0
701    processed_conversations: int = 0
702    processed_messages: int = 0
703    errors: List[Dict[str, Any]] = field(default_factory=list)
704    cancel_requested: bool = False
705    memory_monitor: Optional['MemoryMonitor'] = None
706
707    def __post_init__(self):
708        """Initialize console based on available libraries."""
709        if RICH_AVAILABLE and not self.console:
710            self.console = Console()
711
712        if self.options.enable_memory_optimization:
713            try:
714                self.memory_monitor = MemoryMonitor(self)
715            except Exception as e:
716                self.logger.warning(f"Failed to initialize memory monitor: {e}")
717
718    @property
719    def progress_tracker(self):
720        """Get a progress tracker based on available libraries."""
721        if RICH_AVAILABLE:
722            return Progress(
723                TextColumn("[bold blue]{task.description}"),
724                BarColumn(),
725                "[progress.percentage]{task.percentage:>3.0f}%",
726                TimeElapsedColumn(),
727                TimeRemainingColumn(),
728                console=self.console
729            )
730        elif TQDM_AVAILABLE:
731            return tqdm
732        else:
733            return None  # Simple text-based progress will be used
734
735    @contextmanager
736    def create_temp_directory(self) -> Generator[Path, None, None]:
737        """Create and manage a temporary directory for processing."""
738        try:
739            temp_dir = Path(tempfile.mkdtemp(prefix="original_scripts.testing_"))
740            self.temp_dir = temp_dir
741            yield temp_dir
742        finally:
743            if self.temp_dir and self.temp_dir.exists():
744                shutil.rmtree(self.temp_dir, ignore_errors=True)
745                self.temp_dir = None
746
747    def check_memory(self) -> bool:
748        """
749        Check memory usage and optimize if needed.
750
751        Returns:
752            True if optimization was performed, False otherwise
753        """
754        if self.memory_monitor and self.options.enable_memory_optimization:
755            return self.memory_monitor.check_memory()
756        return False
757
758    def get_memory_report(self) -> Optional[Dict[str, Any]]:
759        """
760        Get a report on memory usage.
761
762        Returns:
763            Dictionary with memory statistics or None if monitoring disabled
764        """
765        if self.memory_monitor:
766            return self.memory_monitor.get_memory_report()
767        return None
768
769def setup_logging(level: LogLevel = LogLevel.INFO, log_file: Optional[Path] = None) -> logging.Logger:
770    """
771    Configure logging with rich formatting if available.
772
773    Args:
774        level: Logging level to use
775        log_file: Optional file path to write logs to
776
777    Returns:
778        Configured logger instance
779    """
780    log_format = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
781
782    # Create logger
783    logger = logging.getLogger("original_scripts.testing")
784    logger.setLevel(level.value)
785    logger.handlers = []  # Clear any existing handlers
786
787    # Console handler
788    if RICH_AVAILABLE:
789        console_handler = RichHandler(rich_tracebacks=True)
790        console_handler.setFormatter(logging.Formatter("%(message)s"))
791    else:
792        console_handler = logging.StreamHandler()
793        console_handler.setFormatter(logging.Formatter(log_format))
794
795    console_handler.setLevel(level.value)
796    logger.addHandler(console_handler)
797
798    # File handler if specified
799    if log_file:
800        log_file.parent.mkdir(parents=True, exist_ok=True)
801        file_handler = logging.FileHandler(log_file, encoding='utf-8')
802        file_handler.setFormatter(logging.Formatter(log_format))
803        file_handler.setLevel(level.value)
804        logger.addHandler(file_handler)
805
806    return logger
807
808def get_logger(name: str, ctx: AppContext) -> logging.Logger:
809    """
810    Get a consistently configured logger instance.
811
812    Args:
813        name: Logger name (will be prefixed with original_scripts.testing)
814        ctx: Application context with configuration
815
816    Returns:
817        Configured logger instance
818    """
819    logger = ctx.logger.getChild(name)
820    return logger
821
822# ═════════════════════════════════════════════════════════════════════════════
823# ═══════════════════════════ DOMAIN MODELS ════════════════════════════════════
824# ═════════════════════════════════════════════════════════════════════════════
825
826@dataclass
827class SkypeMessage:
828    """Represents a single message in a Skype conversation."""
829    id: str
830    timestamp: datetime.datetime
831    sender_id: str
832    sender_display_name: str
833    content: str
834    message_type: str
835    edited: bool = False
836    original_json: Dict[str, Any] = field(default_factory=dict)
837
838    @property
839    def formatted_timestamp(self) -> str:
840        """Format the timestamp for display."""
841        return self.timestamp.strftime("%Y-%m-%d %H:%M:%S")
842
843    @property
844    def date(self) -> datetime.date:
845        """Get the date of the message."""
846        return self.timestamp.date()
847
848    @property
849    def time(self) -> datetime.time:
850        """Get the time of the message."""
851        return self.timestamp.time()
852
853@dataclass
854class SkypeConversation:
855    """Represents a Skype conversation with metadata and messages."""
856    id: str
857    display_name: str
858    messages: List[SkypeMessage] = field(default_factory=list)
859    first_timestamp: Optional[datetime.datetime] = None
860    last_timestamp: Optional[datetime.datetime] = None
861    participants: Dict[str, str] = field(default_factory=dict)
862    original_json: Dict[str, Any] = field(default_factory=dict)
863
864    def __post_init__(self):
865        """Calculate first and last timestamps after initialization."""
866        if self.messages:
867            message_timestamps = [m.timestamp for m in self.messages]
868            self.first_timestamp = min(message_timestamps)
869            self.last_timestamp = max(message_timestamps)
870
871    @property
872    def message_count(self) -> int:
873        """Get the total number of messages in the conversation."""
874        return len(self.messages)
875
876    @property
877    def duration(self) -> Optional[datetime.timedelta]:
878        """Get the duration of the conversation."""
879        if self.first_timestamp and self.last_timestamp:
880            return self.last_timestamp - self.first_timestamp
881        return None
882
883    @property
884    def days_active(self) -> Optional[int]:
885        """Get the number of days the conversation was active."""
886        if self.duration:
887            return self.duration.days
888        return None
889
890    def get_messages_by_date(self, date: datetime.date) -> List[SkypeMessage]:
891        """Get all messages from a specific date."""
892        return [msg for msg in self.messages if msg.date == date]
893
894    def get_message_dates(self) -> Set[datetime.date]:
895        """Get all unique dates when messages were sent."""
896        return {msg.date for msg in self.messages}
897
898    def add_message(self, message: SkypeMessage) -> None:
899        """Add a message to the conversation and update timestamps."""
900        self.messages.append(message)
901
902        # Update first/last timestamps
903        if not self.first_timestamp or message.timestamp < self.first_timestamp:
904            self.first_timestamp = message.timestamp
905
906        if not self.last_timestamp or message.timestamp > self.last_timestamp:
907            self.last_timestamp = message.timestamp
908
909@dataclass
910class SkypeExport:
911    """Represents a complete Skype export with metadata and conversations."""
912    user_id: str
913    export_date: datetime.datetime
914    conversations: Dict[str, SkypeConversation] = field(default_factory=dict)
915    original_json: Dict[str, Any] = field(default_factory=dict)
916
917    @property
918    def total_messages(self) -> int:
919        """Get the total number of messages across all conversations."""
920        return sum(conv.message_count for conv in self.conversations.values())
921
922    @property
923    def total_conversations(self) -> int:
924        """Get the total number of conversations."""
925        return len(self.conversations)
926
927    def get_conversation_by_id(self, id: str) -> Optional[SkypeConversation]:
928        """Get a conversation by its ID."""
929        return self.conversations.get(id)
930
931    def add_conversation(self, conversation: SkypeConversation) -> None:
932        """Add a conversation to the export."""
933        self.conversations[conversation.id] = conversation
934
935    def filter_conversations(self, pattern: str) -> List[SkypeConversation]:
936        """Filter conversations by display name pattern."""
937        return [conv for conv in self.conversations.values()
938                if fnmatch.fnmatch(conv.display_name.lower(), pattern.lower())]
939
940    def get_conversation_stats(self) -> Dict[str, Any]:
941        """Generate statistics about the conversations."""
942        stats = {
943            "total_conversations": self.total_conversations,
944            "total_messages": self.total_messages,
945            "conversation_details": []
946        }
947
948        for conv_id, conv in self.conversations.items():
949            # Skip empty conversations
950            if not conv.messages:
951                continue
952
953            conv_stats = {
954                "id": conv.id,
955                "display_name": conv.display_name,
956                "message_count": conv.message_count,
957                "days_active": conv.days_active,
958                "first_message": conv.first_timestamp.isoformat() if conv.first_timestamp else None,
959                "last_message": conv.last_timestamp.isoformat() if conv.last_timestamp else None,
960                "participants": len(conv.participants),
961                "participants_names": list(conv.participants.values()),
962                "message_types": {}
963            }
964
965            # Count message types
966            for msg in conv.messages:
967                if msg.message_type not in conv_stats["message_types"]:
968                    conv_stats["message_types"][msg.message_type] = 0
969                conv_stats["message_types"][msg.message_type] += 1
970
971            stats["conversation_details"].append(conv_stats)
972
973        return stats
974
975# ═════════════════════════════════════════════════════════════════════════════
976# ═══════════════════════════ DATABASE MODELS ═════════════════════════════════
977# ═════════════════════════════════════════════════════════════════════════════
978
979if SQLALCHEMY_AVAILABLE:
980    Base = declarative_base()
981
982    class DbConversation(Base):
983        """Database model for Skype conversations."""
984        __tablename__ = 'conversations'
985
986        id = Column(String(255), primary_key=True)
987        display_name = Column(String(255), index=True)
988        first_timestamp = Column(DateTime, nullable=True, index=True)
989        last_timestamp = Column(DateTime, nullable=True, index=True)
990        message_count = Column(Integer, default=0)
991        days_active = Column(Integer, nullable=True)
992        export_date = Column(DateTime, nullable=False)
993        metadata_json = Column(Text, nullable=True)
994
995        # Relationships
996        messages = relationship("DbMessage", back_populates="conversation",
997                                cascade="all, delete-orphan")
998        participants = relationship("DbParticipant", back_populates="conversation",
999                                   cascade="all, delete-orphan")
1000
1001    class DbMessage(Base):
1002        """Database model for Skype messages."""
1003        __tablename__ = 'messages'
1004
1005        id = Column(String(255), primary_key=True)
1006        conversation_id = Column(String(255), ForeignKey('conversations.id'), index=True)
1007        timestamp = Column(DateTime, nullable=False, index=True)
1008        sender_id = Column(String(255), index=True)
1009        sender_display_name = Column(String(255))
1010        content = Column(Text, nullable=True)
1011        message_type = Column(String(50), index=True)
1012        edited = Column(Boolean, default=False)
1013        metadata_json = Column(Text, nullable=True)
1014
1015        # Relationships
1016        conversation = relationship("DbConversation", back_populates="messages")
1017
1018    class DbParticipant(Base):
1019        """Database model for conversation participants."""
1020        __tablename__ = 'participants'
1021
1022        id = Column(Integer, primary_key=True, autoincrement=True)
1023        conversation_id = Column(String(255), ForeignKey('conversations.id'), index=True)
1024        user_id = Column(String(255), index=True)
1025        display_name = Column(String(255))
1026
1027        # Relationships
1028        conversation = relationship("DbConversation", back_populates="participants")
1029
1030        # Composite unique constraint
1031        __table_args__ = (
1032            sqlalchemy.UniqueConstraint('conversation_id', 'user_id', name='uq_participant'),
1033        )
1034
1035    class DbExportMeta(Base):
1036        """Database model for export metadata."""
1037        __tablename__ = 'export_metadata'
1038
1039        id = Column(Integer, primary_key=True, autoincrement=True)
1040        export_date = Column(DateTime, nullable=False, index=True)
1041        user_id = Column(String(255), index=True)
1042        user_display_name = Column(String(255))
1043        format = Column(String(50))
1044        total_conversations = Column(Integer, default=0)
1045        total_messages = Column(Integer, default=0)
1046        duration_seconds = Column(Integer, default=0)
1047        metadata_json = Column(Text, nullable=True)
1048
1049class DatabaseManager:
1050    """Manage database connections and operations."""
1051
1052    def __init__(self, ctx: AppContext):
1053        """
1054        Initialize the database manager.
1055
1056        Args:
1057            ctx: Application context
1058        """
1059        self.ctx = ctx
1060        self.logger = get_logger('database', ctx)
1061        self.engine = None
1062        self.session_factory = None
1063
1064        # Check required dependencies
1065        if not SQLALCHEMY_AVAILABLE:
1066            self.logger.error("SQLAlchemy is required for database operations but not installed")
1067            raise DatabaseError("SQLAlchemy is required but not installed")
1068
1069        if not PSYCOPG2_AVAILABLE and ctx.options.format == OutputFormat.POSTGRESQL:
1070            self.logger.error("psycopg2 is required for PostgreSQL export but not installed")
1071            raise DatabaseError("psycopg2 is required but not installed")
1072
1073    def initialize(self) -> None:
1074        """Initialize database connection and create schema if needed."""
1075        config = self.ctx.options.database_config
1076
1077        try:
1078            # Create engine with connection pooling
1079            self.engine = create_engine(
1080                config.connection_string,
1081                pool_size=config.connection_pool_size,
1082                max_overflow=config.connection_max_overflow,
1083                pool_timeout=config.connection_timeout,
1084                echo=config.echo_sql
1085            )
1086
1087            # Create session factory
1088            self.session_factory = sessionmaker(bind=self.engine)
1089
1090            # Create tables if they don't exist
1091            Base.metadata.create_all(self.engine)
1092
1093            self.logger.info(f"Connected to database: {config.engine}://{config.host}:{config.port}/{config.database}")
1094
1095        except Exception as e:
1096            self.logger.error(f"Database initialization error: {e}")
1097            raise DatabaseError(f"Failed to initialize database: {e}") from e
1098
1099    @contextmanager
1100    def session(self) -> Generator[Session, None, None]:
1101        """
1102        Get a database session with automatic cleanup.
1103
1104        Yields:
1105            SQLAlchemy session
1106        """
1107        if not self.session_factory:
1108            self.initialize()
1109
1110        session = self.session_factory()
1111        try:
1112            yield session
1113            session.commit()
1114        except Exception as e:
1115            session.rollback()
1116            self.logger.error(f"Database session error: {e}")
1117            raise
1118        finally:
1119            session.close()
1120
1121    def count_conversations(self) -> int:
1122        """
1123        Count conversations in the database.
1124
1125        Returns:
1126            Number of conversations
1127        """
1128        with self.session() as session:
1129            return session.query(DbConversation).count()
1130
1131    def count_messages(self) -> int:
1132        """
1133        Count messages in the database.
1134
1135        Returns:
1136            Number of messages
1137        """
1138        with self.session() as session:
1139            return session.query(DbMessage).count()
1140
1141    def create_export_metadata(self, skype_export: SkypeExport, duration_seconds: int) -> None:
1142        """
1143        Create export metadata record.
1144
1145        Args:
1146            skype_export: SkypeExport object
1147            duration_seconds: Export duration in seconds
1148        """
1149        with self.session() as session:
1150            meta = DbExportMeta(
1151                export_date=skype_export.export_date,
1152                user_id=skype_export.user_id,
1153                user_display_name=self.ctx.user_display_name,
1154                format=self.ctx.options.format.name,
1155                total_conversations=skype_export.total_conversations,
1156                total_messages=skype_export.total_messages,
1157                duration_seconds=duration_seconds,
1158                metadata_json=json.dumps({
1159                    "export_date": self.ctx.export_date,
1160                    "export_time": self.ctx.export_time,
1161                    "options": {k: str(v) for k, v in dataclasses.asdict(self.ctx.options).items()
1162                               if k != 'database_config'}
1163                })
1164            )
1165            session.add(meta)
1166
1167# ═════════════════════════════════════════════════════════════════════════════
1168# ═══════════════════════════ CORE PROCESSORS ═════════════════════════════════
1169# ═════════════════════════════════════════════════════════════════════════════
1170
1171class FileReader(ABC):
1172    """Abstract base class for reading different types of input files."""
1173
1174    @abstractmethod
1175    async def read(self, file_path: Path, ctx: AppContext) -> Dict[str, Any]:
1176        """
1177        Read and parse input file.
1178
1179        Args:
1180            file_path: Path to the input file
1181            ctx: Application context
1182
1183        Returns:
1184            Parsed content as dictionary
1185        """
1186        pass
1187
1188    @classmethod
1189    def create_reader(cls, file_path: Path) -> 'FileReader':
1190        """
1191        Factory method to create appropriate reader based on file extension.
1192
1193        Args:
1194            file_path: Path to input file
1195
1196        Returns:
1197            Appropriate FileReader instance
1198        """
1199        suffix = file_path.suffix.lower()
1200        if suffix == '.json':
1201            return JsonFileReader()
1202        elif suffix == '.tar' or suffix == '.gz' or suffix == '.tgz':
1203            return TarFileReader()
1204        elif suffix == '.zip':
1205            return ZipFileReader()
1206        else:
1207            raise ValueError(f"Unsupported file type: {suffix}")
1208
1209class JsonFileReader(FileReader):
1210    """Reader for JSON files."""
1211
1212    async def read(self, file_path: Path, ctx: AppContext) -> Dict[str, Any]:
1213        """Read a regular JSON file."""
1214        ctx.logger.debug(f"Reading JSON file: {file_path}")
1215        loop = asyncio.get_event_loop()
1216
1217        # Check file size - if large, use streaming parser
1218        file_size = file_path.stat().st_size
1219        large_file_threshold = 100 * 1024 * 1024  # 100 MB
1220
1221        if file_size > large_file_threshold:
1222            ctx.logger.info(f"Large JSON file detected ({file_size/1024/1024:.2f} MB). Using streaming parser.")
1223            try:
1224                # Use ijson for streaming if available
1225                import_result = importlib.util.find_spec("ijson")
1226                if import_result is not None:
1227                    import ijson
1228                    return await loop.run_in_executor(None, self._read_with_ijson, file_path)
1229                else:
1230                    ctx.logger.warning("ijson package not available for streaming. Using standard JSON parser.")
1231            except ImportError:
1232                ctx.logger.warning("ijson import failed. Using standard JSON parser.")
1233
1234        # Default JSON loading for normal-sized files or if ijson fails
1235        try:
1236            return await loop.run_in_executor(None, self._read_standard_json, file_path)
1237        except json.JSONDecodeError as e:
1238            raise ParseError(f"Failed to parse JSON file {file_path}: {e}")
1239        except Exception as e:
1240            raise FileReadError(f"Failed to read JSON file {file_path}: {e}")
1241
1242    def _read_standard_json(self, file_path: Path) -> Dict[str, Any]:
1243        """Read a JSON file using the standard json module."""
1244        with open(file_path, 'r', encoding='utf-8') as f:
1245            return json.load(f)
1246
1247    def _read_with_ijson(self, file_path: Path) -> Dict[str, Any]:
1248        """Stream parse a large JSON file using ijson."""
1249        import ijson
1250
1251        result = {}
1252        with open(file_path, 'rb') as f:
1253            # Read top-level elements
1254            for prefix, event, value in ijson.parse(f):
1255                if prefix == '' and event == 'map_key':
1256                    current_key = value
1257                elif prefix == '' and event in ('string', 'number', 'boolean'):
1258                    result[current_key] = value
1259
1260            # Reopen file and stream the conversations array specifically
1261            f.seek(0)
1262            conversations = []
1263            for conversation in ijson.items(f, 'conversations.item'):
1264                conversations.append(conversation)
1265
1266            result['conversations'] = conversations
1267
1268        return result
1269
1270class TarFileReader(FileReader):
1271    """Reader for TAR file archives."""
1272
1273    async def read(self, file_path: Path, ctx: AppContext) -> Dict[str, Any]:
1274        """Read and extract a TAR archive."""
1275        ctx.logger.debug(f"Reading TAR file: {file_path}")
1276        loop = asyncio.get_event_loop()
1277
1278        try:
1279            return await loop.run_in_executor(None, self._process_tar, file_path, ctx)
1280        except Exception as e:
1281            ctx.logger.error(f"Error reading TAR file {file_path}: {e}")
1282            raise FileReadError(f"Failed to read TAR file: {e}")
1283
1284    def _process_tar(self, file_path: Path, ctx: AppContext) -> Dict[str, Any]:
1285        """Process TAR file contents in a separate thread."""
1286        with tarfile.open(file_path, 'r:*') as tar:
1287            # Extract all files to temporary directory
1288            temp_dir = Path(tempfile.mkdtemp(prefix="original_scripts.testing_"))
1289            try:
1290                tar.extractall(path=temp_dir)
1291                ctx.logger.debug(f"Extracted TAR contents to {temp_dir}")
1292
1293                # Find JSON files
1294                json_files = list(temp_dir.glob('**/*.json'))
1295
1296                # Check if we found any JSON files
1297                if not json_files:
1298                    raise FileReadError(f"No JSON files found in TAR archive: {file_path}")
1299
1300                # Handle multiple JSON files
1301                if len(json_files) > 1:
1302                    ctx.logger.warning(f"Multiple JSON files found in archive: {[f.name for f in json_files]}")
1303
1304                    # In interactive/basic mode, prompt the user to select
1305                    if hasattr(ctx, 'ui') and ctx.options.basic_mode:
1306                        print("\nMultiple JSON files found in the archive:")
1307                        for i, f in enumerate(json_files, 1):
1308                            print(f"  {i}: {f.name} ({f.stat().st_size / 1024 / 1024:.2f} MB)")
1309
1310                        try:
1311                            selection = input("\nEnter number to select (1-{}) or press Enter for first file: ".format(len(json_files)))
1312                            if selection.strip():
1313                                index = int(selection.strip()) - 1
1314                                if 0 <= index < len(json_files):
1315                                    json_file = json_files[index]
1316                                    ctx.logger.info(f"Selected file: {json_file.name}")
1317                                else:
1318                                    ctx.logger.warning(f"Invalid selection, using first file: {json_files[0].name}")
1319                                    json_file = json_files[0]
1320                            else:
1321                                ctx.logger.info(f"No selection made, using first file: {json_files[0].name}")
1322                                json_file = json_files[0]
1323                        except (ValueError, IndexError):
1324                            ctx.logger.warning(f"Invalid input, using first file: {json_files[0].name}")
1325                            json_file = json_files[0]
1326                    else:
1327                        # In non-interactive mode, use largest JSON file (likely the main export)
1328                        json_file = max(json_files, key=lambda f: f.stat().st_size)
1329                        ctx.logger.info(f"Selected largest JSON file: {json_file.name} ({json_file.stat().st_size / 1024 / 1024:.2f} MB)")
1330                else:
1331                    json_file = json_files[0]
1332                    ctx.logger.debug(f"Found JSON file: {json_file}")
1333
1334                # Read the selected JSON file
1335                with open(json_file, 'r', encoding='utf-8') as f:
1336                    data = json.load(f)
1337
1338                return data
1339            finally:
1340                # Clean up temporary directory
1341                shutil.rmtree(temp_dir)
1342
1343class ZipFileReader(FileReader):
1344    """Reader for ZIP file archives."""
1345
1346    async def read(self, file_path: Path, ctx: AppContext) -> Dict[str, Any]:
1347        """Read and extract a ZIP archive."""
1348        ctx.logger.debug(f"Reading ZIP file: {file_path}")
1349        loop = asyncio.get_event_loop()
1350
1351        try:
1352            return await loop.run_in_executor(None, self._process_zip, file_path, ctx)
1353        except Exception as e:
1354            ctx.logger.error(f"Error reading ZIP file {file_path}: {e}")
1355            raise FileReadError(f"Failed to read ZIP file: {e}")
1356
1357    def _process_zip(self, file_path: Path, ctx: AppContext) -> Dict[str, Any]:
1358        """Process ZIP file contents in a separate thread."""
1359        with zipfile.ZipFile(file_path, 'r') as zip_file:
1360            # Extract all files to temporary directory
1361            temp_dir = Path(tempfile.mkdtemp(prefix="original_scripts.testing_"))
1362            try:
1363                zip_file.extractall(path=temp_dir)
1364                ctx.logger.debug(f"Extracted ZIP contents to {temp_dir}")
1365
1366                # Find JSON files
1367                json_files = list(temp_dir.glob('**/*.json'))
1368
1369                # Check if we found any JSON files
1370                if not json_files:
1371                    raise FileReadError(f"No JSON files found in ZIP archive: {file_path}")
1372
1373                # Handle multiple JSON files
1374                if len(json_files) > 1:
1375                    ctx.logger.warning(f"Multiple JSON files found in archive: {[f.name for f in json_files]}")
1376
1377                    # In interactive/basic mode, prompt the user to select
1378                    if hasattr(ctx, 'ui') and ctx.options.basic_mode:
1379                        print("\nMultiple JSON files found in the archive:")
1380                        for i, f in enumerate(json_files, 1):
1381                            print(f"  {i}: {f.name} ({f.stat().st_size / 1024 / 1024:.2f} MB)")
1382
1383                        try:
1384                            selection = input("\nEnter number to select (1-{}) or press Enter for first file: ".format(len(json_files)))
1385                            if selection.strip():
1386                                index = int(selection.strip()) - 1
1387                                if 0 <= index < len(json_files):
1388                                    json_file = json_files[index]
1389                                    ctx.logger.info(f"Selected file: {json_file.name}")
1390                                else:
1391                                    ctx.logger.warning(f"Invalid selection, using first file: {json_files[0].name}")
1392                                    json_file = json_files[0]
1393                            else:
1394                                ctx.logger.info(f"No selection made, using first file: {json_files[0].name}")
1395                                json_file = json_files[0]
1396                        except (ValueError, IndexError):
1397                            ctx.logger.warning(f"Invalid input, using first file: {json_files[0].name}")
1398                            json_file = json_files[0]
1399                    else:
1400                        # In non-interactive mode, use largest JSON file (likely the main export)
1401                        json_file = max(json_files, key=lambda f: f.stat().st_size)
1402                        ctx.logger.info(f"Selected largest JSON file: {json_file.name} ({json_file.stat().st_size / 1024 / 1024:.2f} MB)")
1403                else:
1404                    json_file = json_files[0]
1405                    ctx.logger.debug(f"Found JSON file: {json_file}")
1406
1407                # Read the selected JSON file
1408                with open(json_file, 'r', encoding='utf-8') as f:
1409                    data = json.load(f)
1410
1411                return data
1412            finally:
1413                # Clean up temporary directory
1414                shutil.rmtree(temp_dir)
1415
1416class SkypeExportParser:
1417    """Parser for Skype export data."""
1418
1419    def __init__(self, ctx: AppContext):
1420        """
1421        Initialize the parser.
1422
1423        Args:
1424            ctx: Application context
1425        """
1426        self.ctx = ctx
1427        self.logger = get_logger('parser', ctx)
1428
1429    async def parse(self, data: Dict[str, Any]) -> SkypeExport:
1430        """
1431        Parse raw Skype export data into structured domain objects.
1432
1433        Args:
1434            data: Raw JSON data from Skype export
1435
1436        Returns:
1437            Structured SkypeExport object
1438        """
1439        self.logger.info("Parsing Skype export data...")
1440
1441        # Extract basic metadata
1442        user_id, export_date = self._extract_metadata(data)
1443
1444        # Create export object
1445        skype_export = SkypeExport(
1446            user_id=user_id,
1447            export_date=export_date,
1448            original_json=data
1449        )
1450
1451        # Build ID to display name mapping
1452        id_to_display_name = self._build_display_name_map(data)
1453
1454        # Process all conversations
1455        conversations = data.get('conversations', [])
1456        self.ctx.total_conversations = len(conversations)
1457
1458        # Optimize batch size if needed
1459        self._optimize_batch_size(conversations)
1460
1461        # Parse all conversations with progress tracking
1462        await self._parse_conversations_with_progress(conversations, id_to_display_name, skype_export)
1463
1464        self.logger.info(f"Parsed {skype_export.total_conversations} conversations with {skype_export.total_messages} messages")
1465        return skype_export
1466
1467    async def _parse_conversations_with_progress(self, conversations: List[Dict[str, Any]],
1468                                                id_to_display_name: Dict[str, str],
1469                                                skype_export: SkypeExport) -> None:
1470        """
1471        Parse conversations with progress tracking.
1472
1473        Args:
1474            conversations: List of conversation data
1475            id_to_display_name: Mapping of user IDs to display names
1476            skype_export: SkypeExport object
1477        """
1478        progress_tracker = self.ctx.progress_tracker
1479        if RICH_AVAILABLE and progress_tracker and not self.ctx.options.basic_mode:
1480            with progress_tracker as progress:
1481                task = progress.add_task("[cyan]Parsing conversations...", total=len(conversations))
1482                for i, conv_data in enumerate(conversations):
1483                    conversation = await self._parse_conversation(conv_data, id_to_display_name)
1484                    skype_export.add_conversation(conversation)
1485                    progress.update(task, advance=1)
1486
1487                    # Periodically check memory usage
1488                    if i % 5 == 0:
1489                        self.ctx.check_memory()
1490
1491                    # Check for cancellation
1492                    if self.ctx.cancel_requested:
1493                        self.logger.info("Parsing cancelled by user")
1494                        break
1495        else:
1496            # Simple parsing without rich progress bar
1497            for i, conv_data in enumerate(conversations):
1498                if i % 10 == 0:
1499                    self.logger.info(f"Parsing conversation {i+1}/{len(conversations)}")
1500
1501                conversation = await self._parse_conversation(conv_data, id_to_display_name)
1502                skype_export.add_conversation(conversation)
1503
1504                # Periodically check memory usage
1505                if i % 5 == 0:
1506                    self.ctx.check_memory()
1507
1508                # Check for cancellation
1509                if self.ctx.cancel_requested:
1510                    self.logger.info("Parsing cancelled by user")
1511                    break
1512
1513    async def _parse_conversation(self, conv_data: Dict[str, Any],
1514                                id_to_display_name: Dict[str, str]) -> SkypeConversation:
1515        """
1516        Parse a single conversation from raw data.
1517
1518        Args:
1519            conv_data: Raw conversation data
1520            id_to_display_name: Mapping of user IDs to display names
1521
1522        Returns:
1523            Structured SkypeConversation object
1524        """
1525        conv_id = conv_data.get('id', '')
1526        display_name = conv_data.get('displayName', '')
1527
1528        # Handle missing display name
1529        if not display_name:
1530            # Try to extract from ID (typically format is "8:username")
1531            try:
1532                display_name = conv_id.split(':')[1]
1533            except (IndexError, AttributeError):
1534                display_name = f"Conversation {conv_id}"
1535
1536        # Update ID to display name mapping
1537        id_to_display_name[conv_id] = display_name
1538
1539        # Create conversation object
1540        conversation = SkypeConversation(
1541            id=conv_id,
1542            display_name=display_name,
1543            original_json=conv_data
1544        )
1545
1546        # Parse messages in parallel if enabled
1547        message_list = conv_data.get('MessageList', [])
1548
1549        if self.ctx.options.parallel and len(message_list) > 100 and not self.ctx.options.basic_mode:
1550            # Process messages in batches for large conversations
1551            loop = asyncio.get_event_loop()
1552
1553            # Use dynamic batch size based on memory constraints
1554            batch_size = self.ctx.options.batch_size
1555            batches = [message_list[i:i+batch_size] for i in range(0, len(message_list), batch_size)]
1556
1557            self.logger.debug(f"Processing {len(message_list)} messages in {len(batches)} batches "
1558                           f"(batch size: {batch_size})")
1559
1560            with concurrent.futures.ThreadPoolExecutor(
1561                max_workers=self.ctx.options.max_workers
1562            ) as executor:
1563                # Process each batch in parallel
1564                tasks = []
1565                for batch in batches:
1566                    task = loop.run_in_executor(
1567                        executor,
1568                        self._process_message_batch,
1569                        batch,
1570                        id_to_display_name,
1571                        conversation
1572                    )
1573                    tasks.append(task)
1574
1575                # Wait for all batches to complete
1576                completed_count = 0
1577                for completed_task in await asyncio.gather(*tasks):
1578                    completed_count += 1
1579
1580                    # Periodically check memory usage
1581                    if completed_count % 5 == 0:
1582                        self.ctx.check_memory()
1583        else:
1584            # Process messages sequentially for smaller conversations
1585            for msg_data in message_list:
1586                message = self._parse_message(msg_data, id_to_display_name)
1587                conversation.add_message(message)
1588
1589        # Update participant mapping
1590        for message in conversation.messages:
1591            if message.sender_id not in conversation.participants:
1592                conversation.participants[message.sender_id] = message.sender_display_name
1593
1594        # Sort messages by timestamp
1595        conversation.messages.sort(key=lambda msg: msg.timestamp)
1596
1597        return conversation
1598
1599    def _process_message_batch(self, batch: List[Dict[str, Any]],
1600                              id_to_display_name: Dict[str, str],
1601                              conversation: SkypeConversation) -> List[SkypeMessage]:
1602        """
1603        Process a batch of messages in a separate thread.
1604
1605        Args:
1606            batch: List of raw message data
1607            id_to_display_name: Mapping of user IDs to display names
1608            conversation: Conversation to add messages to
1609
1610        Returns:
1611            List of parsed messages
1612        """
1613        messages = []
1614        for msg_data in batch:
1615            message = self._parse_message(msg_data, id_to_display_name)
1616            conversation.add_message(message)
1617            messages.append(message)
1618
1619        # Trigger garbage collection for very large batches to manage memory
1620        if len(batch) > 5000 and self.ctx.options.enable_memory_optimization:
1621            gc.collect()
1622
1623        return messages
1624
1625    def _parse_message(self, msg_data: Dict[str, Any],
1626                       id_to_display_name: Dict[str, str]) -> SkypeMessage:
1627        """
1628        Parse a single message from raw data.
1629
1630        Args:
1631            msg_data: Raw message data
1632            id_to_display_name: Mapping of user IDs to display names
1633
1634        Returns:
1635            Structured SkypeMessage object
1636        """
1637        # Extract basic message data
1638        msg_id = msg_data.get('id', str(uuid.uuid4()))
1639
1640        # Parse timestamp
1641        timestamp_str = msg_data.get('originalarrivaltime', '')
1642        try:
1643            timestamp = datetime.datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
1644        except (ValueError, TypeError):
1645            self.logger.warning(f"Invalid timestamp format: {timestamp_str}")
1646            timestamp = datetime.datetime.now(datetime.timezone.utc)
1647
1648        # Convert to local time if requested
1649        if self.ctx.options.use_local_time:
1650            timestamp = timestamp.astimezone()
1651
1652        # Extract sender info
1653        sender_id = msg_data.get('from', '')
1654        sender_display_name = id_to_display_name.get(sender_id, sender_id)
1655
1656        # Extract content and type
1657        content = msg_data.get('content', '')
1658        msg_type = msg_data.get('messagetype', 'unknown')
1659
1660        # Special handling for non-text message types
1661        if msg_type != 'RichText':
1662            content = self._get_message_type_description(msg_type)
1663
1664        # Check for edited messages
1665        edited = bool(re.search(r'<e_m.*>', content))
1666
1667        # Create message object
1668        message = SkypeMessage(
1669            id=msg_id,
1670            timestamp=timestamp,
1671            sender_id=sender_id,
1672            sender_display_name=sender_display_name,
1673            content=content,
1674            message_type=msg_type,
1675            edited=edited,
1676            original_json=msg_data
1677        )
1678
1679        return message
1680
1681    def _get_message_type_description(self, msg_type: str) -> str:
1682        """
1683        Convert Skype message type to human-readable description.
1684
1685        Args:
1686            msg_type: Skype message type
1687
1688        Returns:
1689            Human-readable description
1690        """
1691        type_descriptions = {
1692            'Event/Call': '***A call started/ended***',
1693            'Poll': '***Created a poll***',
1694            'RichText/Media_Album': '***Sent an album of images***',
1695            'RichText/Media_AudioMsg': '***Sent a voice message***',
1696            'RichText/Media_CallRecording': '***Sent a call recording***',
1697            'RichText/Media_Card': '***Sent a media card***',
1698            'RichText/Media_FlikMsg': '***Sent a moji***',
1699            'RichText/Media_GenericFile': '***Sent a file***',
1700            'RichText/Media_Video': '***Sent a video message***',
1701            'RichText/UriObject': '***Sent a photo***',
1702            'RichText/ScheduledCallInvite': '***Scheduled a call***',
1703            'RichText/Location': '***Sent a location***',
1704            'RichText/Contacts': '***Sent a contact***',
1705        }
1706
1707        return type_descriptions.get(msg_type, f'***Sent a {msg_type}***')
1708
1709    def _optimize_batch_size(self, conversations: List[Dict[str, Any]]) -> None:
1710        """
1711        Calculate optimal batch size based on data volume.
1712
1713        Args:
1714            conversations: List of raw conversation data
1715        """
1716        # Only optimize if memory monitoring is enabled
1717        if not (self.ctx.options.enable_memory_optimization and self.ctx.memory_monitor):
1718            return
1719
1720        # Adjust batch size based on number of conversations and available memory
1721        conversation_count = len(conversations)
1722        estimated_total_messages = 0
1723
1724        # Sample a few conversations to estimate total message count
1725        sample_size = min(10, conversation_count)
1726        for i in range(sample_size):
1727            conv_data = conversations[i]
1728            estimated_total_messages += len(conv_data.get('MessageList', []))
1729
1730        if sample_size > 0:
1731            avg_messages = estimated_total_messages / sample_size
1732            estimated_total = avg_messages * conversation_count
1733
1734            # Adjust batch size if total is large
1735            if estimated_total > 100000:
1736                optimal_batch_size = self.ctx.memory_monitor.calculate_optimal_batch_size(
1737                    int(estimated_total)
1738                )
1739                self.logger.info(f"Adjusting batch size to {optimal_batch_size} "
1740                             f"based on estimated {estimated_total:.0f} messages")
1741                self.ctx.options.batch_size = optimal_batch_size
1742
1743    def _extract_metadata(self, data: Dict[str, Any]) -> Tuple[str, datetime.datetime]:
1744        """
1745        Extract user ID and export date from the export data.
1746
1747        Args:
1748            data: Raw JSON data from Skype export
1749
1750        Returns:
1751            Tuple of (user_id, export_date)
1752        """
1753        # Default values
1754        user_id = "unknown"
1755        export_date = datetime.datetime.now()
1756
1757        # Try to extract user ID
1758        if "userId" in data:
1759            user_id = data["userId"]
1760        elif "creator" in data:
1761            user_id = data["creator"]
1762        elif "exportedBy" in data:
1763            user_id = data["exportedBy"]
1764
1765        # Try to extract export date
1766        if "exportDate" in data:
1767            try:
1768                if isinstance(data["exportDate"], str):
1769                    # Try ISO format first
1770                    try:
1771                        export_date = datetime.datetime.fromisoformat(data["exportDate"])
1772                    except ValueError:
1773                        # Try various date formats
1774                        for fmt in ["%Y-%m-%d", "%Y/%m/%d", "%d-%m-%Y", "%d/%m/%Y"]:
1775                            try:
1776                                export_date = datetime.datetime.strptime(data["exportDate"], fmt)
1777                                break
1778                            except ValueError:
1779                                continue
1780                elif isinstance(data["exportDate"], int):
1781                    # Assume Unix timestamp (seconds since epoch)
1782                    export_date = datetime.datetime.fromtimestamp(data["exportDate"])
1783            except Exception as e:
1784                self.logger.warning(f"Failed to parse export date: {e}")
1785
1786        # If we still don't have a user ID, try to extract from file metadata
1787        if user_id == "unknown" and "personaList" in data:
1788            for persona in data["personaList"]:
1789                if "cid" in persona:
1790                    user_id = persona["cid"]
1791                    break
1792
1793        return user_id, export_date
1794
1795    def _build_display_name_map(self, data: Dict[str, Any]) -> Dict[str, str]:
1796        """
1797        Build a mapping from user IDs to display names.
1798
1799        Args:
1800            data: Raw JSON data from Skype export
1801
1802        Returns:
1803            Dictionary mapping user IDs to display names
1804        """
1805        id_to_display_name = {}
1806
1807        # Extract from personas list if available
1808        if "personaList" in data:
1809            for persona in data["personaList"]:
1810                if "cid" in persona and "displayName" in persona:
1811                    id_to_display_name[persona["cid"]] = persona["displayName"]
1812
1813        # Extract from conversations/chats if available
1814        if "conversations" in data:
1815            for conv in data["conversations"]:
1816                if "id" in conv and "displayName" in conv:
1817                    id_to_display_name[conv["id"]] = conv["displayName"]
1818
1819        if "chats" in data:
1820            for chat in data["chats"]:
1821                if "id" in chat and "threadProperties" in chat and "topic" in chat["threadProperties"]:
1822                    id_to_display_name[chat["id"]] = chat["threadProperties"]["topic"]
1823                elif "id" in chat and "displayName" in chat:
1824                    id_to_display_name[chat["id"]] = chat["displayName"]
1825
1826        return id_to_display_name
1827
1828class ContentFormatter:
1829    """Base class for content formatting with rich formatting support."""
1830
1831    def __init__(self, ctx: AppContext):
1832        """
1833        Initialize the formatter.
1834
1835        Args:
1836            ctx: Application context
1837        """
1838        self.ctx = ctx
1839        self.logger = get_logger('formatter', ctx)
1840
1841    def format_timestamp(self, timestamp: datetime.datetime) -> str:
1842        """Format timestamp for display."""
1843        return timestamp.strftime("%Y-%m-%d %H:%M:%S")
1844
1845    def format_message(self, message: SkypeMessage) -> str:
1846        """
1847        Format a message for display.
1848
1849        Args:
1850            message: Message to format
1851
1852        Returns:
1853            Formatted message string
1854        """
1855        timestamp = ""
1856        if self.ctx.options.include_timestamps:
1857            timestamp = f"[{self.format_timestamp(message.timestamp)}] "
1858
1859        formatted = f"{timestamp}{message.sender_display_name}: {message.content}"
1860        return formatted
1861
1862    def create_banner(self, conversation: SkypeConversation) -> str:
1863        """
1864        Create a banner with conversation metadata.
1865
1866        Args:
1867            conversation: Conversation to create banner for
1868
1869        Returns:
1870            Banner string
1871        """
1872        banner = [
1873            f"Conversation with: {conversation.display_name} ({conversation.id})",
1874            f"Exported on: {self.ctx.export_date}, at: {self.ctx.export_time}",
1875        ]
1876
1877        if conversation.first_timestamp and conversation.last_timestamp:
1878            banner.extend([
1879                f"Conversations From: {self.format_timestamp(conversation.first_timestamp)}",
1880                f"                To: {self.format_timestamp(conversation.last_timestamp)}",
1881            ])
1882
1883        banner.append("***** All times are in UTC *****" if not self.ctx.options.use_local_time
1884                     else "***** All times are in local time *****")
1885
1886        return "\n".join(banner)
1887
1888    def parse_content(self, content: str) -> str:
1889        """
1890        Parse and clean message content.
1891
1892        Args:
1893            content: Raw message content
1894
1895        Returns:
1896            Cleaned content
1897        """
1898        if self.ctx.options.strip_html:
1899            if BEAUTIFULSOUP_AVAILABLE:
1900                return self._parse_with_beautifulsoup(content)
1901            else:
1902                return self._parse_with_regex(content)
1903        return content
1904
1905    def _parse_with_beautifulsoup(self, content: str) -> str:
1906        """
1907        Parse content using BeautifulSoup.
1908
1909        Args:
1910            content: Raw HTML content
1911
1912        Returns:
1913            Plain text content
1914        """
1915        soup = BeautifulSoup(content, 'lxml')
1916        text = soup.get_text()
1917        return self._pretty_quotes(text)
1918
1919    def _parse_with_regex(self, content: str) -> str:
1920        """
1921        Parse content using regex fallback.
1922
1923        Args:
1924            content: Raw HTML content
1925
1926        Returns:
1927            Plain text content
1928        """
1929        tag_pattern = re.compile(r'<.*?>')
1930        content = tag_pattern.sub('', content)
1931        content = html.unescape(content)
1932        return self._pretty_quotes(content)
1933
1934    def _pretty_quotes(self, text: str) -> str:
1935        """
1936        Format quotes for better readability.
1937
1938        Args:
1939            text: Text with quote markers
1940
1941        Returns:
1942            Text with formatted quotes
1943        """
1944        # Replace quote markers with more readable format
1945        quote_pattern = re.compile(r'\[[+-]?\d+(?:\.\d+)?\]')
1946        text = quote_pattern.sub(r'\n\t*** Quoting the following message: ***\n\t', text)
1947
1948        response_pattern = re.compile(r'\<\<\<')
1949        text = response_pattern.sub('\t*** And responding with: ***\n\t', text)
1950
1951        return text
1952
1953class TextExporter:
1954    """Exports conversations to plain text format."""
1955
1956    def __init__(self, ctx: AppContext):
1957        """Initialize text exporter with application context."""
1958        self.ctx = ctx
1959        self.logger = ctx.logger.getChild('exporter.text')
1960        self.formatter = ContentFormatter(ctx)
1961        # Check if aiofiles is available
1962        self.aiofiles_available = importlib.util.find_spec("aiofiles") is not None
1963        if self.aiofiles_available:
1964            self.logger.debug("aiofiles is available, will use for async file operations")
1965            import aiofiles
1966            self.aiofiles = aiofiles
1967        else:
1968            self.logger.debug("aiofiles not available, using custom async file wrapper")
1969
1970    async def export_conversation(self, conversation: SkypeConversation, output_dir: Path) -> Path:
1971        """
1972        Export a conversation to a text file.
1973
1974        Args:
1975            conversation: Conversation to export
1976            output_dir: Output directory
1977
1978        Returns:
1979            Path to the exported file
1980        """
1981        self.logger.info(f"Exporting conversation '{conversation.display_name}' to text")
1982
1983        # Create file name from conversation display name
1984        file_name = sanitize_filename(conversation.display_name)
1985        output_path = get_unique_filename(output_dir, file_name, "txt")
1986
1987        # Prepare content
1988        content = []
1989
1990        # Add banner with conversation info
1991        content.append(self.formatter.create_banner(conversation))
1992        content.append("")  # Empty line after banner
1993
1994        # Group messages by date
1995        message_dates = sorted(conversation.get_message_dates())
1996
1997        # Process each date
1998        for date in message_dates:
1999            # Add date header
2000            date_str = date.strftime("%A, %B %d, %Y")
2001            content.append(f"\n=== {date_str} ===\n")
2002
2003            # Add messages for this date
2004            messages = conversation.get_messages_by_date(date)
2005            for message in messages:
2006                content.append(self.formatter.format_message(message))
2007
2008        # Write to file using async I/O
2009        try:
2010            if self.aiofiles_available:
2011                # Use aiofiles for truly async I/O
2012                async with self.aiofiles.open(output_path, 'w', encoding='utf-8') as f:
2013                    await f.write('\n'.join(content))
2014            else:
2015                # Fall back to custom async wrapper
2016                with self._async_open(output_path, 'w', encoding='utf-8') as f:
2017                    await f.write('\n'.join(content))
2018
2019            self.logger.info(f"Exported to {output_path}")
2020            return output_path
2021
2022        except Exception as e:
2023            self.logger.error(f"Failed to write text file: {e}")
2024            raise FileWriteError(f"Failed to write text file: {e}")
2025
2026    @contextmanager
2027    def _async_open(self, file_path: Path, mode: str, **kwargs):
2028        """
2029        Context manager for async file operations.
2030
2031        Args:
2032            file_path: Path to file
2033            mode: File mode
2034            **kwargs: Additional open arguments
2035
2036        Yields:
2037            AsyncFile object
2038        """
2039        class AsyncFile:
2040            def __init__(self, file_obj):
2041                self.file_obj = file_obj
2042
2043            async def write(self, content):
2044                loop = asyncio.get_event_loop()
2045                await loop.run_in_executor(None, self.file_obj.write, content)
2046
2047            async def read(self):
2048                loop = asyncio.get_event_loop()
2049                return await loop.run_in_executor(None, self.file_obj.read)
2050
2051        file_obj = open(file_path, mode, **kwargs)
2052        try:
2053            yield AsyncFile(file_obj)
2054        finally:
2055            file_obj.close()
2056
2057class HtmlExporter:
2058    """Exporter for HTML format with styling."""
2059
2060    def __init__(self, ctx: AppContext):
2061        """
2062        Initialize the exporter.
2063
2064        Args:
2065            ctx: Application context
2066        """
2067        self.ctx = ctx
2068        self.formatter = ContentFormatter(ctx)
2069        self.logger = get_logger('exporter.html', ctx)
2070
2071        # Check for required dependencies
2072        if not JINJA2_AVAILABLE:
2073            ctx.logger.warning("Jinja2 not installed. HTML export will use basic formatting.")
2074
2075    async def export_conversation(self, conversation: SkypeConversation, output_dir: Path) -> Path:
2076        """
2077        Export a conversation to HTML format.
2078
2079        Args:
2080            conversation: Conversation to export
2081            output_dir: Directory to write output to
2082
2083        Returns:
2084            Path to the exported file
2085        """
2086        self.logger.debug(f"Exporting conversation {conversation.display_name} to HTML")
2087
2088        # Create filename with enhanced sanitization
2089        safe_name = sanitize_filename(conversation.display_name)
2090        filename = f"[{self.ctx.export_date}]-{safe_name}.html"
2091        output_path = output_dir / filename
2092
2093        # Group messages by date
2094        message_groups = {}
2095        for date in sorted(conversation.get_message_dates()):
2096            message_groups[date.isoformat()] = conversation.get_messages_by_date(date)
2097
2098        # Generate HTML
2099        if JINJA2_AVAILABLE:
2100            html_content = self._generate_html_with_jinja(conversation, message_groups)
2101        else:
2102            html_content = self._generate_basic_html(conversation, message_groups)
2103
2104        # Write to file
2105        try:
2106            loop = asyncio.get_event_loop()
2107            await loop.run_in_executor(
2108                None,
2109                lambda: output_path.write_text(html_content, encoding='utf-8')
2110            )
2111        except Exception as e:
2112            self.logger.error(f"Error writing to {output_path}: {e}")
2113            raise FileWriteError(f"Failed to write HTML to {output_path}: {e}")
2114
2115        self.logger.info(f"Exported {conversation.message_count} messages to {output_path}")
2116        return output_path
2117
2118    def _generate_html_with_jinja(self, conversation: SkypeConversation,
2119                                 message_groups: Dict[str, List[SkypeMessage]]) -> str:
2120        """
2121        Generate HTML using Jinja2 templates.
2122
2123        Args:
2124            conversation: Conversation to export
2125            message_groups: Messages grouped by date
2126
2127        Returns:
2128            Generated HTML string
2129        """
2130        # Create template
2131        template_str = """
2132        <!DOCTYPE html>
2133        <html lang="en">
2134        <head>
2135            <meta charset="UTF-8">
2136            <meta name="viewport" content="width=device-width, initial-scale=1.0">
2137            <title>{{ conversation.display_name }} - Skype Chat</title>
2138            <style>
2139                body {
2140                    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
2141                    line-height: 1.6;
2142                    color: #333;
2143                    max-width: 800px;
2144                    margin: 0 auto;
2145                    padding: 20px;
2146                }
2147                .header {
2148                    background-color: #00aff0;
2149                    color: white;
2150                    padding: 15px;
2151                    border-radius: 5px;
2152                    margin-bottom: 20px;
2153                }
2154                .date-header {
2155                    background-color: #e6e6e6;
2156                    padding: 8px 15px;
2157                    border-radius: 5px;
2158                    margin: 25px 0 15px 0;
2159                    font-weight: bold;
2160                }
2161                .message {
2162                    margin-bottom: 10px;
2163                    padding: 10px;
2164                    border-radius: 5px;
2165                }
2166                .message:nth-child(odd) {
2167                    background-color: #f5f5f5;
2168                }
2169                .timestamp {
2170                    color: #777;
2171                    font-size: 0.85em;
2172                    margin-right: 10px;
2173                }
2174                .sender {
2175                    font-weight: bold;
2176                    margin-right: 10px;
2177                }
2178                .content {
2179                    white-space: pre-wrap;
2180                }
2181                .quote {
2182                    border-left: 3px solid #00aff0;
2183                    padding-left: 10px;
2184                    color: #555;
2185                    font-style: italic;
2186                }
2187                .metadata {
2188                    font-size: 0.9em;
2189                    color: #777;
2190                }
2191                .edited {
2192                    color: #999;
2193                    font-style: italic;
2194                    font-size: 0.85em;
2195                }
2196                .special {
2197                    color: #777;
2198                    font-style: italic;
2199                }
2200            </style>
2201        </head>
2202        <body>
2203            <div class="header">
2204                <h1>{{ conversation.display_name }}</h1>
2205                <div class="metadata">
2206                    <p>Exported on: {{ export_date }}, at: {{ export_time }}</p>
2207                    {% if conversation.first_timestamp %}
2208                    <p>Conversations from: {{ formatter.format_timestamp(conversation.first_timestamp) }}</p>
2209                    <p>To: {{ formatter.format_timestamp(conversation.last_timestamp) }}</p>
2210                    {% endif %}
2211                    <p>{{ time_zone_note }}</p>
2212                </div>
2213            </div>
2214
2215            {% for date, messages in message_groups.items() %}
2216                <div class="date-header">Conversations on {{ date }}</div>
2217
2218                {% for message in messages %}
2219                    <div class="message">
2220                        {% if include_timestamps %}
2221                        <span class="timestamp">[{{ formatter.format_timestamp(message.timestamp) }}]</span>
2222                        {% endif %}
2223                        <span class="sender">{{ message.sender_display_name }}:</span>
2224
2225                        {% if message.message_type != 'RichText' %}
2226                            <span class="special">{{ message.content }}</span>
2227                        {% else %}
2228                            <span class="content">{{ formatter.parse_content(message.content) }}</span>
2229                            {% if message.edited %}
2230                            <div class="edited">This message was edited</div>
2231                            {% endif %}
2232                        {% endif %}
2233                    </div>
2234                {% endfor %}
2235            {% endfor %}
2236        </body>
2237        </html>
2238        """
2239
2240        # Create template and render
2241        template = jinja2.Template(template_str)
2242        return template.render(
2243            conversation=conversation,
2244            message_groups=message_groups,
2245            formatter=self.formatter,
2246            export_date=self.ctx.export_date,
2247            export_time=self.ctx.export_time,
2248            include_timestamps=self.ctx.options.include_timestamps,
2249            time_zone_note="All times are in UTC" if not self.ctx.options.use_local_time else "All times are in local time"
2250        )
2251
2252    def _generate_basic_html(self, conversation: SkypeConversation,
2253                            message_groups: Dict[str, List[SkypeMessage]]) -> str:
2254        """
2255        Generate basic HTML without Jinja2.
2256
2257        Args:
2258            conversation: Conversation to export
2259            message_groups: Messages grouped by date
2260
2261        Returns:
2262            Generated HTML string
2263        """
2264        # Create HTML pieces
2265        html_parts = [
2266            '<!DOCTYPE html>',
2267            '<html lang="en">',
2268            '<head>',
2269            '    <meta charset="UTF-8">',
2270            f'    <title>{html.escape(conversation.display_name)} - Skype Chat</title>',
2271            '    <style>',
2272            '        body { font-family: sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }',
2273            '        .header { background-color: #00aff0; color: white; padding: 15px; }',
2274            '        .date-header { background-color: #e6e6e6; padding: 8px; margin: 20px 0 10px 0; }',
2275            '        .message { margin-bottom: 10px; padding: 8px; }',
2276            '        .message:nth-child(odd) { background-color: #f5f5f5; }',
2277            '    </style>',
2278            '</head>',
2279            '<body>',
2280            f'    <div class="header"><h1>{html.escape(conversation.display_name)}</h1>',
2281            f'        <p>Exported on: {self.ctx.export_date}, at: {self.ctx.export_time}</p>'
2282        ]
2283
2284        if conversation.first_timestamp and conversation.last_timestamp:
2285            html_parts.extend([
2286                f'        <p>Conversations from: {self.formatter.format_timestamp(conversation.first_timestamp)}</p>',
2287                f'        <p>To: {self.formatter.format_timestamp(conversation.last_timestamp)}</p>'
2288            ])
2289
2290        time_note = "All times are in UTC" if not self.ctx.options.use_local_time else "All times are in local time"
2291        html_parts.append(f'        <p>{time_note}</p>')
2292        html_parts.append('    </div>')
2293
2294        # Add messages grouped by date
2295        for date, messages in message_groups.items():
2296            html_parts.append(f'    <div class="date-header">Conversations on {date}</div>')
2297
2298            for message in messages:
2299                html_parts.append('    <div class="message">')
2300
2301                if self.ctx.options.include_timestamps:
2302                    html_parts.append(f'        <span>[{self.formatter.format_timestamp(message.timestamp)}]</span>')
2303
2304                html_parts.append(f'        <strong>{html.escape(message.sender_display_name)}:</strong> ')
2305
2306                # Handle different message types
2307                if message.message_type != 'RichText':
2308                    html_parts.append(f'        <em>{html.escape(message.content)}</em>')
2309                else:
2310                    content = self.formatter.parse_content(message.content)
2311                    html_parts.append(f'        <span>{html.escape(content)}</span>')
2312
2313                    if message.edited:
2314                        html_parts.append('        <div><em>This message was edited</em></div>')
2315
2316                html_parts.append('    </div>')
2317
2318        html_parts.extend(['</body>', '</html>'])
2319        return '\n'.join(html_parts)
2320
2321class MarkdownExporter:
2322    """Exporter for Markdown format."""
2323
2324    def __init__(self, ctx: AppContext):
2325        """
2326        Initialize the exporter.
2327
2328        Args:
2329            ctx: Application context
2330        """
2331        self.ctx = ctx
2332        self.formatter = ContentFormatter(ctx)
2333        self.logger = get_logger('exporter.markdown', ctx)
2334
2335    async def export_conversation(self, conversation: SkypeConversation, output_dir: Path) -> Path:
2336        """
2337        Export a conversation to Markdown format.
2338
2339        Args:
2340            conversation: Conversation to export
2341            output_dir: Directory to write output to
2342
2343        Returns:
2344            Path to the exported file
2345        """
2346        self.logger.debug(f"Exporting conversation {conversation.display_name} to Markdown")
2347
2348        # Create filename with enhanced sanitization
2349        safe_name = sanitize_filename(conversation.display_name)
2350        filename = f"[{self.ctx.export_date}]-{safe_name}.md"
2351        output_path = output_dir / filename
2352
2353        # Create banner
2354        content = [
2355            f"# Conversation with {conversation.display_name}",
2356            "",
2357            "## Metadata",
2358            f"- **Exported on:** {self.ctx.export_date}, at: {self.ctx.export_time}"
2359        ]
2360
2361        if conversation.first_timestamp and conversation.last_timestamp:
2362            content.extend([
2363                f"- **First message:** {self.formatter.format_timestamp(conversation.first_timestamp)}",
2364                f"- **Last message:** {self.formatter.format_timestamp(conversation.last_timestamp)}"
2365            ])
2366
2367        time_note = "All times are in UTC" if not self.ctx.options.use_local_time else "All times are in local time"
2368        content.append(f"- **Note:** {time_note}")
2369        content.append("")
2370
2371        # Group messages by date
2372        for date in sorted(conversation.get_message_dates()):
2373            date_messages = conversation.get_messages_by_date(date)
2374            if date_messages:
2375                content.append(f"## Conversations on {date.isoformat()}")
2376                content.append("")
2377
2378                for message in date_messages:
2379                    # Format timestamp
2380                    timestamp = ""
2381                    if self.ctx.options.include_timestamps:
2382                        timestamp = f"**[{self.formatter.format_timestamp(message.timestamp)}]** "
2383
2384                    # Format sender
2385                    sender = f"**{message.sender_display_name}:** "
2386
2387                    # Format content
2388                    if message.message_type != 'RichText':
2389                        msg_content = f"*{message.content}*"
2390                    else:
2391                        msg_content = self.formatter.parse_content(message.content)
2392                        # Escape markdown characters in content
2393                        msg_content = re.sub(r'([_*~`#])', r'\\\1', msg_content)
2394
2395                    # Add edited indicator
2396                    if message.edited:
2397                        msg_content += " *(edited)*"
2398
2399                    # Add complete message
2400                    content.append(f"{timestamp}{sender}{msg_content}")
2401                    content.append("")
2402
2403        # Write to file
2404        try:
2405            loop = asyncio.get_event_loop()
2406            await loop.run_in_executor(
2407                None,
2408                lambda: output_path.write_text('\n'.join(content), encoding='utf-8')
2409            )
2410        except Exception as e:
2411            self.logger.error(f"Error writing to {output_path}: {e}")
2412            raise FileWriteError(f"Failed to write Markdown to {output_path}: {e}")
2413
2414        self.logger.info(f"Exported {conversation.message_count} messages to {output_path}")
2415        return output_path
2416
2417class JsonExporter:
2418    """Exporter for JSON format with full message data."""
2419
2420    def __init__(self, ctx: AppContext):
2421        """
2422        Initialize the exporter.
2423
2424        Args:
2425            ctx: Application context
2426        """
2427        self.ctx = ctx
2428        self.logger = get_logger('exporter.json', ctx)
2429
2430    async def export_conversation(self, conversation: SkypeConversation, output_dir: Path) -> Path:
2431        """
2432        Export a conversation to JSON format.
2433
2434        Args:
2435            conversation: Conversation to export
2436            output_dir: Directory to write output to
2437
2438        Returns:
2439            Path to the exported file
2440        """
2441        self.logger.debug(f"Exporting conversation {conversation.display_name} to JSON")
2442
2443        # Create filename with enhanced sanitization
2444        safe_name = sanitize_filename(conversation.display_name)
2445        filename = f"[{self.ctx.export_date}]-{safe_name}.json"
2446        output_path = output_dir / filename
2447
2448        # Create serializable data structure
2449        data = {
2450            "metadata": {
2451                "id": conversation.id,
2452                "display_name": conversation.display_name,
2453                "export_date": self.ctx.export_date,
2454                "export_time": self.ctx.export_time,
2455                "message_count": conversation.message_count,
2456                "first_message": conversation.first_timestamp.isoformat() if conversation.first_timestamp else None,
2457                "last_message": conversation.last_timestamp.isoformat() if conversation.last_timestamp else None,
2458                "participants": conversation.participants,
2459                "timezone": "UTC" if not self.ctx.options.use_local_time else "local"
2460            },
2461            "messages": []
2462        }
2463
2464        # Add messages
2465        for message in conversation.messages:
2466            msg_data = {
2467                "id": message.id,
2468                "timestamp": message.timestamp.isoformat(),
2469                "sender_id": message.sender_id,
2470                "sender_display_name": message.sender_display_name,
2471                "content": message.content,
2472                "message_type": message.message_type,
2473                "edited": message.edited
2474            }
2475
2476            # Include original JSON if requested
2477            if self.ctx.options.include_message_ids:
2478                msg_data["original_json"] = message.original_json
2479
2480            data["messages"].append(msg_data)
2481
2482        # Write to file with indentation if pretty print is enabled
2483        indent = 2 if self.ctx.options.pretty_print else None
2484
2485        try:
2486            loop = asyncio.get_event_loop()
2487            await loop.run_in_executor(
2488                None,
2489                lambda: output_path.write_text(
2490                    json.dumps(data, indent=indent, ensure_ascii=False),
2491                    encoding='utf-8'
2492                )
2493            )
2494        except Exception as e:
2495            self.logger.error(f"Error writing to {output_path}: {e}")
2496            raise FileWriteError(f"Failed to write JSON to {output_path}: {e}")
2497
2498        self.logger.info(f"Exported {conversation.message_count} messages to {output_path}")
2499        return output_path
2500
2501class PostgreSQLExporter:
2502    """Exporter for PostgreSQL database with normalized schema."""
2503
2504    def __init__(self, ctx: AppContext):
2505        """
2506        Initialize the exporter.
2507
2508        Args:
2509            ctx: Application context
2510        """
2511        self.ctx = ctx
2512        self.logger = get_logger('exporter.postgresql', ctx)
2513
2514        # Check for required dependencies
2515        if not SQLALCHEMY_AVAILABLE:
2516            raise ExportError("SQLAlchemy is required for PostgreSQL export but not installed")
2517
2518        if not PSYCOPG2_AVAILABLE:
2519            raise ExportError("psycopg2 is required for PostgreSQL export but not installed")
2520
2521        # Initialize database manager
2522        self.db_manager = DatabaseManager(ctx)
2523
2524        # Assign DB model classes to instance attributes for use in queries
2525        self.DbConversation = DbConversation
2526        self.DbMessage = DbMessage
2527        self.DbParticipant = DbParticipant
2528
2529    async def export_conversation(self, conversation: SkypeConversation, output_dir: Path) -> Path:
2530        """
2531        Export a conversation to PostgreSQL database.
2532
2533        Args:
2534            conversation: Conversation to export
2535            output_dir: Directory to write output to
2536
2537        Returns:
2538            Path to a metadata file with export info
2539        """
2540        self.logger.debug(f"Exporting conversation {conversation.display_name} to PostgreSQL")
2541
2542        # Create metadata file
2543        safe_name = sanitize_filename(conversation.display_name)
2544        filename = f"[{self.ctx.export_date}]-{safe_name}-pg_export_info.json"
2545        output_path = output_dir / filename
2546
2547        try:
2548            # Initialize database connection
2549            if not hasattr(self, '_db_initialized'):
2550                self.db_manager.initialize()
2551                self._db_initialized = True
2552
2553            # Export conversation to database
2554            await self._export_to_database(conversation)
2555
2556            # Create a metadata file with export information
2557            meta_data = {
2558                "export_type": "PostgreSQL",
2559                "conversation": {
2560                    "id": conversation.id,
2561                    "display_name": conversation.display_name,
2562                    "message_count": conversation.message_count,
2563                    "first_message": conversation.first_timestamp.isoformat() if conversation.first_timestamp else None,
2564                    "last_message": conversation.last_timestamp.isoformat() if conversation.last_timestamp else None,
2565                },
2566                "database": {
2567                    "engine": self.ctx.options.database_config.engine,
2568                    "host": self.ctx.options.database_config.host,
2569                    "port": self.ctx.options.database_config.port,
2570                    "database": self.ctx.options.database_config.database,
2571                    "schema": self.ctx.options.database_config.schema,
2572                },
2573                "export_date": self.ctx.export_date,
2574                "export_time": self.ctx.export_time,
2575                "sql_connection_string": self.get_sanitized_connection_string()
2576            }
2577
2578            loop = asyncio.get_event_loop()
2579            await loop.run_in_executor(
2580                None,
2581                lambda: output_path.write_text(
2582                    json.dumps(meta_data, indent=2, ensure_ascii=False),
2583                    encoding='utf-8'
2584                )
2585            )
2586
2587            self.logger.info(f"Exported {conversation.message_count} messages to PostgreSQL "
2588                          f"and saved metadata to {output_path}")
2589            return output_path
2590
2591        except Exception as e:
2592            self.logger.error(f"Error exporting to PostgreSQL: {e}")
2593            raise ExportError(f"Failed to export to PostgreSQL: {e}")
2594
2595    async def _export_to_database(self, conversation: SkypeConversation) -> None:
2596        """
2597        Export conversation data to PostgreSQL database.
2598
2599        Args:
2600            conversation: Conversation to export
2601        """
2602        # Use asyncio to run database operations in a thread pool
2603        loop = asyncio.get_event_loop()
2604        await loop.run_in_executor(
2605            None,
2606            self._export_conversation_sync,
2607            conversation
2608        )
2609
2610    def _export_conversation_sync(self, conversation: SkypeConversation) -> None:
2611        """Export a conversation to PostgreSQL database (synchronous)."""
2612        try:
2613            # First handle the conversation record in its own transaction
2614            with self.db_manager.session() as session:
2615                try:
2616                    # Check if conversation already exists
2617                    db_conversation = session.query(self.DbConversation).filter_by(
2618                        id=conversation.id
2619                    ).first()
2620
2621                    # Create or update conversation record
2622                    if not db_conversation:
2623                        db_conversation = self.DbConversation(
2624                            id=conversation.id,
2625                            display_name=conversation.display_name,
2626                            first_timestamp=conversation.first_timestamp,
2627                            last_timestamp=conversation.last_timestamp,
2628                            message_count=conversation.message_count,
2629                            days_active=conversation.days_active,
2630                            export_date=datetime.datetime.now(),
2631                            metadata_json=json.dumps(conversation.original_json)
2632                                            if self.ctx.options.include_metadata else None
2633                        )
2634                        session.add(db_conversation)
2635                    else:
2636                        # Update existing conversation
2637                        db_conversation.display_name = conversation.display_name
2638                        db_conversation.first_timestamp = conversation.first_timestamp
2639                        db_conversation.last_timestamp = conversation.last_timestamp
2640                        db_conversation.message_count = conversation.message_count
2641                        db_conversation.days_active = conversation.days_active
2642                        db_conversation.export_date = datetime.datetime.now()
2643                        if self.ctx.options.include_metadata:
2644                            db_conversation.metadata_json = json.dumps(conversation.original_json)
2645
2646                    # Process participants in the same transaction as the conversation
2647                    for user_id, display_name in conversation.participants.items():
2648                        # Check if participant already exists for this conversation
2649                        participant = session.query(self.DbParticipant).filter_by(
2650                            conversation_id=conversation.id,
2651                            user_id=user_id
2652                        ).first()
2653
2654                        if not participant:
2655                            participant = self.DbParticipant(
2656                                conversation_id=conversation.id,
2657                                user_id=user_id,
2658                                display_name=display_name
2659                            )
2660                            session.add(participant)
2661                        else:
2662                            participant.display_name = display_name
2663
2664                    # Commit conversation and participants
2665                    session.commit()
2666                    self.logger.debug(f"Saved conversation record for {conversation.id}")
2667
2668                except Exception as e:
2669                    session.rollback()
2670                    self.logger.error(f"Failed to save conversation record: {e}")
2671                    # Re-raise to abort the whole export for this conversation
2672                    raise
2673
2674            # Process messages in batches with separate transactions
2675            batch_size = self.ctx.options.batch_size
2676            total_messages = len(conversation.messages)
2677            successful_messages = 0
2678            failed_batches = 0
2679
2680            # Process messages in batches
2681            for i in range(0, len(conversation.messages), batch_size):
2682                batch = conversation.messages[i:i+batch_size]
2683
2684                # Create a new session for each batch
2685                with self.db_manager.session() as session:
2686                    try:
2687                        for message in batch:
2688                            # Check if message already exists
2689                            existing_message = session.query(self.DbMessage).filter_by(
2690                                id=message.id
2691                            ).first()
2692
2693                            if not existing_message:
2694                                # Create new message record
2695                                db_message = self.DbMessage(
2696                                    id=message.id,
2697                                    conversation_id=conversation.id,
2698                                    timestamp=message.timestamp,
2699                                    sender_id=message.sender_id,
2700                                    sender_display_name=message.sender_display_name,
2701                                    content=message.content,
2702                                    message_type=message.message_type,
2703                                    edited=message.edited,
2704                                    metadata_json=json.dumps(message.original_json)
2705                                                if self.ctx.options.include_metadata else None
2706                                )
2707                                session.add(db_message)
2708
2709                        # Commit this batch
2710                        session.commit()
2711                        successful_messages += len(batch)
2712                        self.logger.debug(f"Processed message batch {i//batch_size + 1}/{(total_messages-1)//batch_size + 1} "
2713                                        f"({len(batch)} messages)")
2714
2715                    except Exception as e:
2716                        session.rollback()
2717                        failed_batches += 1
2718                        self.logger.error(f"Failed to process message batch {i//batch_size + 1}: {e}")
2719                        # Continue with next batch instead of aborting all
2720
2721                # Check memory after each batch
2722                if self.ctx.check_memory():
2723                    self.logger.debug("Memory optimization performed between batches")
2724
2725            # Log summary
2726            if failed_batches > 0:
2727                self.logger.warning(f"Conversation {conversation.id} export completed with {failed_batches} failed batches. "
2728                                 f"Successfully saved {successful_messages}/{total_messages} messages.")
2729            else:
2730                self.logger.info(f"Successfully exported conversation {conversation.id} "
2731                               f"with {successful_messages} messages.")
2732
2733        except Exception as e:
2734            self.logger.error(f"Failed to export conversation {conversation.id}: {e}")
2735            raise
2736
2737    def get_sanitized_connection_string(self) -> str:
2738        """Generate SQLAlchemy connection string with password masked for secure logging."""
2739        config = self.ctx.options.database_config
2740        # Always mask password regardless of its length
2741        return (f"{config.engine}://{config.username}:****@"
2742                f"{config.host}:{config.port}/{config.database}")
2743
2744class ExportManager:
2745    """Manages the export process for all conversation formats."""
2746
2747    def __init__(self, ctx: AppContext):
2748        """
2749        Initialize the export manager.
2750
2751        Args:
2752            ctx: Application context
2753        """
2754        self.ctx = ctx
2755        self.logger = get_logger('export_manager', ctx)
2756
2757        # Create exporters
2758        self.exporters = {
2759            OutputFormat.TEXT: TextExporter(ctx),
2760            OutputFormat.HTML: HtmlExporter(ctx),
2761            OutputFormat.MARKDOWN: MarkdownExporter(ctx),
2762            OutputFormat.JSON: JsonExporter(ctx),
2763            OutputFormat.POSTGRESQL: PostgreSQLExporter(ctx)
2764        }
2765
2766    async def export_conversations(self, skype_export: SkypeExport,
2767                                  conversations: List[SkypeConversation] = None) -> Dict[str, List[Path]]:
2768        """
2769        Export selected conversations in specified formats.
2770
2771        Args:
2772            skype_export: Complete Skype export data
2773            conversations: Optional list of conversations to export (all if None)
2774
2775        Returns:
2776            Dictionary mapping format names to lists of exported file paths
2777        """
2778        self.logger.info("Starting export process...")
2779
2780        # Use all conversations if none specified
2781        if conversations is None:
2782            conversations = list(skype_export.conversations.values())
2783
2784        # Filter conversations if pattern specified
2785        if self.ctx.options.filter_pattern:
2786            pattern = self.ctx.options.filter_pattern
2787            filtered = [
2788                c for c in conversations
2789                if fnmatch.fnmatch(c.display_name.lower(), pattern.lower())
2790            ]
2791
2792            if not filtered:
2793                self.logger.warning(f"No conversations matched pattern '{pattern}'")
2794                if not self.ctx.options.basic_mode:
2795                    self.logger.info("Available conversations:")
2796                    for conv in conversations[:10]:
2797                        self.logger.info(f"- {conv.display_name}")
2798                    if len(conversations) > 10:
2799                        self.logger.info(f"... and {len(conversations) - 10} more")
2800
2801            conversations = filtered
2802
2803        # Create output directory
2804        output_dir = self.ctx.options.output_dir
2805        ensure_directory(output_dir)
2806
2807        # Determine which formats to export
2808        formats = [self.ctx.options.format]
2809        if self.ctx.options.format == OutputFormat.ALL:
2810            formats = [f for f in OutputFormat if f != OutputFormat.ALL]
2811
2812        # Create format-specific directories
2813        format_dirs = {}
2814        for format in formats:
2815            format_name = format.name.lower()
2816            format_dir = output_dir / format_name
2817            ensure_directory(format_dir)
2818            format_dirs[format] = format_dir
2819
2820        # Track exported files
2821        exported_files = {format.name: [] for format in formats}
2822
2823        # Create progress bar if available
2824        progress_tracker = self.ctx.progress_tracker
2825        total_exports = len(conversations) * len(formats)
2826
2827        if RICH_AVAILABLE and progress_tracker and not self.ctx.options.basic_mode:
2828            with progress_tracker as progress:
2829                task = progress.add_task("[green]Exporting conversations...", total=total_exports)
2830
2831                # Export each conversation in each format
2832                for conversation in conversations:
2833                    for format in formats:
2834                        if self.ctx.cancel_requested:
2835                            self.logger.info("Export cancelled by user")
2836                            return exported_files
2837
2838                        exported_file = await self._export_conversation(
2839                            conversation, format, format_dirs[format]
2840                        )
2841                        exported_files[format.name].append(exported_file)
2842                        progress.update(task, advance=1)
2843
2844                        # Periodically check memory usage
2845                        self.ctx.check_memory()
2846        else:
2847            # Simple progress tracking
2848            processed = 0
2849
2850            # Export each conversation in each format
2851            for conversation in conversations:
2852                for format in formats:
2853                    if self.ctx.cancel_requested:
2854                        self.logger.info("Export cancelled by user")
2855                        return exported_files
2856
2857                    processed += 1
2858                    if processed % 5 == 0 or processed == total_exports:
2859                        self.logger.info(f"Export progress: {processed}/{total_exports}")
2860
2861                    exported_file = await self._export_conversation(
2862                        conversation, format, format_dirs[format]
2863                    )
2864                    exported_files[format.name].append(exported_file)
2865
2866                    # Periodically check memory usage
2867                    self.ctx.check_memory()
2868
2869        # Create stats file if requested
2870        if self.ctx.options.include_conversation_stats:
2871            await self._export_stats(skype_export, output_dir)
2872
2873        # Compress output if requested
2874        if self.ctx.options.compress_output:
2875            await self._compress_output(output_dir)
2876
2877        return exported_files
2878
2879    async def _export_conversation(self, conversation: SkypeConversation,
2880                                 format: OutputFormat, output_dir: Path) -> Path:
2881        """
2882        Export a single conversation in specified format.
2883
2884        Args:
2885            conversation: Conversation to export
2886            format: Format to export in
2887            output_dir: Directory to write output to
2888
2889        Returns:
2890            Path to exported file
2891        """
2892        try:
2893            exporter = self.exporters[format]
2894            return await exporter.export_conversation(conversation, output_dir)
2895        except Exception as e:
2896            self.logger.error(f"Error exporting conversation {conversation.display_name} "
2897                                 f"in {format.name} format: {e}")
2898            self.ctx.errors.append({
2899                "type": "export_error",
2900                "conversation_id": conversation.id,
2901                "format": format.name,
2902                "error": str(e),
2903                "traceback": traceback.format_exc()
2904            })
2905            # Create a dummy path as fallback
2906            return output_dir / f"ERROR-{sanitize_filename(conversation.id)}.failed"
2907
2908    async def _export_stats(self, skype_export: SkypeExport, output_dir: Path) -> Path:
2909        """
2910        Export conversation statistics.
2911
2912        Args:
2913            skype_export: Complete Skype export data
2914            output_dir: Directory to write output to
2915
2916        Returns:
2917            Path to stats file
2918        """
2919        stats_file = output_dir / "conversation_stats.json"
2920        stats = skype_export.get_conversation_stats()
2921
2922        # Add export metadata
2923        stats["export_metadata"] = {
2924            "export_date": self.ctx.export_date,
2925            "export_time": self.ctx.export_time,
2926            "user_id": self.ctx.user_id,
2927            "user_display_name": self.ctx.user_display_name,
2928            "exported_formats": [f.name for f in OutputFormat if f != OutputFormat.ALL],
2929            "processed_at": datetime.datetime.now().isoformat()
2930        }
2931
2932        # Add memory usage if available
2933        memory_report = self.ctx.get_memory_report()
2934        if memory_report:
2935            stats["system_resources"] = memory_report
2936
2937        # Write stats file
2938        try:
2939            loop = asyncio.get_event_loop()
2940            await loop.run_in_executor(
2941                None,
2942                lambda: stats_file.write_text(
2943                    json.dumps(stats, indent=2, ensure_ascii=False),
2944                    encoding='utf-8'
2945                )
2946            )
2947
2948            self.logger.info(f"Exported conversation statistics to {stats_file}")
2949            return stats_file
2950        except Exception as e:
2951            self.logger.error(f"Error writing statistics to {stats_file}: {e}")
2952            raise FileWriteError(f"Failed to write statistics to {stats_file}: {e}")
2953
2954    async def _compress_output(self, output_dir: Path) -> Path:
2955        """
2956        Compress output directory.
2957
2958        Args:
2959            output_dir: Directory to compress
2960
2961        Returns:
2962            Path to compressed file
2963        """
2964        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
2965        archive_path = output_dir.with_name(f"{output_dir.name}_{timestamp}.zip")
2966
2967        self.logger.info(f"Compressing output to {archive_path}")
2968
2969        # Create zip archive in executor to avoid blocking
2970        try:
2971            loop = asyncio.get_event_loop()
2972            await loop.run_in_executor(
2973                None,
2974                self._create_zip_archive,
2975                output_dir,
2976                archive_path
2977            )
2978
2979            self.logger.info(f"Export compressed to {archive_path}")
2980            return archive_path
2981        except Exception as e:
2982            self.logger.error(f"Error compressing output: {e}")
2983            raise FileWriteError(f"Failed to compress output: {e}")
2984
2985    def _create_zip_archive(self, source_dir: Path, output_path: Path) -> None:
2986        """
2987        Create a ZIP archive of a directory.
2988
2989        Args:
2990            source_dir: Directory to compress
2991            output_path: Path for output ZIP file
2992        """
2993        with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
2994            for root, _, files in os.walk(source_dir):
2995                for file in files:
2996                    file_path = Path(root) / file
2997                    # Make path relative to source_dir
2998                    rel_path = file_path.relative_to(source_dir)
2999                    zipf.write(file_path, arcname=rel_path)
3000
3001# ═════════════════════════════════════════════════════════════════════════════
3002# ═══════════════════════════ BASIC MODE HANDLER ═══════════════════════════════
3003# ═════════════════════════════════════════════════════════════════════════════
3004
3005class BasicModeHandler:
3006    """
3007    Handles simplified workflow for basic mode operation.
3008
3009    This class provides a straightforward procedural flow for users
3010    who prefer simplicity over advanced features.
3011    """
3012
3013    def __init__(self, ctx: AppContext):
3014        """
3015        Initialize basic mode handler.
3016
3017        Args:
3018            ctx: Application context
3019        """
3020        self.ctx = ctx
3021        self.logger = get_logger('basic_mode', ctx)
3022
3023    async def run(self, file_path: Path) -> int:
3024        """
3025        Run the basic mode workflow.
3026
3027        Args:
3028            file_path: Path to Skype export file
3029
3030        Returns:
3031            Exit code
3032        """
3033        # Simple welcome message
3034        print("\n" + "=" * 60)
3035        print("     SkypeExporter - Basic Mode")
3036        print("     Simple Skype Chat Exporter")
3037        print("=" * 60 + "\n")
3038
3039        try:
3040            # Get user display name
3041            user_display_name = input("\nPlease enter your name as you want it to appear in the logs: ")
3042            while not user_display_name.strip():
3043                user_display_name = input("Name cannot be empty. Please enter your name: ")
3044
3045            self.ctx.user_display_name = user_display_name
3046            print(f"\nWelcome, {user_display_name}!")
3047
3048            # Process input file
3049            print(f"\nReading Skype export file: {file_path}")
3050            file_reader = FileReader.create_reader(file_path)
3051            raw_data = await file_reader.read(file_path, self.ctx)
3052
3053            # Parse data
3054            print("\nParsing Skype conversations...")
3055            parser = SkypeExportParser(self.ctx)
3056            skype_export = await parser.parse(raw_data)
3057
3058            # Show available conversations
3059            conversations = list(skype_export.conversations.values())
3060            valid_conversations = [c for c in conversations if c.messages]
3061
3062            if not valid_conversations:
3063                print("\nNo conversations with messages found in the export.")
3064                return 0
3065
3066            print(f"\nFound {len(valid_conversations)} conversations in the export file.")
3067
3068            # Choose export format
3069            print("\nAvailable export formats:")
3070            print("1. Text (.txt)")
3071            print("2. HTML (.html)")
3072            print("3. Markdown (.md)")
3073            print("4. JSON (.json)")
3074            print("5. All formats")
3075
3076            format_choice = input("\nChoose format (1-5): ").strip()
3077            while format_choice not in ["1", "2", "3", "4", "5"]:
3078                format_choice = input("Please enter a number between 1 and 5: ").strip()
3079
3080            format_map = {
3081                "1": OutputFormat.TEXT,
3082                "2": OutputFormat.HTML,
3083                "3": OutputFormat.MARKDOWN,
3084                "4": OutputFormat.JSON,
3085                "5": OutputFormat.ALL
3086            }
3087
3088            self.ctx.options.format = format_map[format_choice]
3089
3090            # Choose conversations
3091            print("\nDo you want to:")
3092            print("1. Export all conversations")
3093            print("2. Select specific conversations")
3094
3095            selection_choice = input("\nChoose option (1-2): ").strip()
3096            while selection_choice not in ["1", "2"]:
3097                selection_choice = input("Please enter either 1 or 2: ").strip()
3098
3099            selected_conversations = None
3100            if selection_choice == "2":
3101                selected_conversations = await self._select_conversations(valid_conversations)
3102
3103                if not selected_conversations:
3104                    print("\nNo conversations selected, nothing to export.")
3105                    return 0
3106
3107            # Choose output directory
3108            default_output_dir = self.ctx.options.output_dir
3109            output_dir = input(f"\nOutput directory [default: {default_output_dir}]: ").strip()
3110            if not output_dir:
3111                output_dir = default_output_dir
3112
3113            self.ctx.options.output_dir = Path(output_dir)
3114
3115            # Advanced options
3116            include_timestamps = input("\nInclude timestamps? (y/n) [default: y]: ").strip().lower()
3117            self.ctx.options.include_timestamps = include_timestamps != "n"
3118
3119            local_time = input("Use local time instead of UTC? (y/n) [default: y]: ").strip().lower()
3120            self.ctx.options.use_local_time = local_time != "n"
3121
3122            compress_output = input("Compress output to zip? (y/n) [default: n]: ").strip().lower()
3123            self.ctx.options.compress_output = compress_output == "y"
3124
3125            # Export conversations
3126            print("\nStarting export process...")
3127
3128            export_manager = ExportManager(self.ctx)
3129            exported_files = await export_manager.export_conversations(
3130                skype_export, selected_conversations
3131            )
3132
3133            # Display summary
3134            total_conversations = sum(len(files) for files in exported_files.values())
3135            output_path = self.ctx.options.output_dir
3136
3137            print("\n" + "=" * 60)
3138            print("            Export Summary")
3139            print("-" * 60)
3140            print(f"Total conversations: {len(valid_conversations)}")
3141            print(f"Exported conversations: {total_conversations}")
3142
3143            for format_name, files in exported_files.items():
3144                if files:
3145                    print(f"{format_name} files: {len(files)}")
3146
3147            print(f"Output directory: {output_path}")
3148
3149            if self.ctx.errors:
3150                print(f"\nErrors: {len(self.ctx.errors)}")
3151                for i, error in enumerate(self.ctx.errors, 1):
3152                    print(f"  {i}. {error['type']} - {error['error']}")
3153
3154            print("\nExport completed successfully!")
3155            print(f"Files saved to: {output_path}")
3156
3157            return 0
3158
3159        except Exception as e:
3160            print(f"\nError: {e}")
3161            return 1
3162
3163    async def _select_conversations(self, conversations: List[SkypeConversation]) -> List[SkypeConversation]:
3164        """
3165        Allow user to select conversations in basic mode.
3166
3167        Args:
3168            conversations: List of valid conversations
3169
3170        Returns:
3171            List of selected conversations
3172        """
3173        print("\nAvailable conversations:")
3174        for i, conv in enumerate(conversations, 1):
3175            message_count = conv.message_count
3176            first_date = conv.first_timestamp.strftime("%Y-%m-%d") if conv.first_timestamp else "N/A"
3177            print(f"{i:3}. {conv.display_name} ({message_count} messages, since {first_date})")
3178
3179        print("\nEnter conversation numbers to export, separated by spaces.")
3180        print("For example: '1 3 5' will export the first, third, and fifth conversations.")
3181        print("Enter 'all' to export all conversations.")
3182
3183        selection = input("\nSelection: ").strip()
3184
3185        if selection.lower() == 'all':
3186            return conversations
3187
3188        try:
3189            indices = [int(idx.strip()) for idx in selection.split() if idx.strip()]
3190
3191            # Validate indices
3192            valid_indices = [idx for idx in indices if 1 <= idx <= len(conversations)]
3193
3194            if not valid_indices:
3195                print("No valid selection made. Please try again.")
3196                return await self._select_conversations(conversations)
3197
3198            # Get selected conversations
3199            selected = [conversations[idx-1] for idx in valid_indices]
3200
3201            # Confirm selection
3202            print(f"\nYou selected {len(selected)} conversations:")
3203            for conv in selected:
3204                print(f"- {conv.display_name}")
3205
3206            confirm = input("\nConfirm selection? (y/n) [default: y]: ").strip().lower()
3207            if confirm == "n":
3208                return await self._select_conversations(conversations)
3209
3210            return selected
3211
3212        except ValueError:
3213            print("Invalid selection format. Please enter numbers separated by spaces.")
3214            return await self._select_conversations(conversations)
3215
3216# ═════════════════════════════════════════════════════════════════════════════
3217# ═══════════════════════════ USER INTERFACE ═════════════════════════════════
3218# ═════════════════════════════════════════════════════════════════════════════
3219
3220class ConversationSelector:
3221    """Interactive conversation selector with rich UI if available."""
3222
3223    def __init__(self, ctx: AppContext):
3224        """
3225        Initialize the selector.
3226
3227        Args:
3228            ctx: Application context
3229        """
3230        self.ctx = ctx
3231        self.logger = get_logger('conversation_selector', ctx)
3232
3233    async def select_conversations(self, skype_export: SkypeExport) -> List[SkypeConversation]:
3234        """
3235        Allow user to select conversations to export.
3236
3237        Args:
3238            skype_export: Complete Skype export data
3239
3240        Returns:
3241            List of selected conversations
3242        """
3243        conversations = list(skype_export.conversations.values())
3244
3245        # Filter out empty conversations
3246        valid_conversations = [c for c in conversations if c.messages]
3247
3248        if not valid_conversations:
3249            self.logger.warning("No conversations with messages found")
3250            return []
3251
3252        # Sort by display name
3253        valid_conversations.sort(key=lambda c: c.display_name.lower())
3254
3255        # Use rich UI if available
3256        if RICH_AVAILABLE:
3257            return await self._rich_select_conversations(valid_conversations)
3258        else:
3259            return await self._text_select_conversations(valid_conversations)
3260
3261    async def _rich_select_conversations(self, conversations: List[SkypeConversation]) -> List[SkypeConversation]:
3262        """
3263        Select conversations using rich UI.
3264
3265        Args:
3266            conversations: Available conversations
3267
3268        Returns:
3269            List of selected conversations
3270        """
3271        # Create table of conversations
3272        table = Table(title="Available Conversations")
3273        table.add_column("#", justify="right")
3274        table.add_column("Name", style="cyan")
3275        table.add_column("Messages", justify="right")
3276        table.add_column("First Message", justify="right")
3277        table.add_column("Last Message", justify="right")
3278
3279        # Add rows
3280        for i, conv in enumerate(conversations, 1):
3281            table.add_row(
3282                str(i),
3283                conv.display_name,
3284                str(conv.message_count),
3285                conv.first_timestamp.strftime("%Y-%m-%d") if conv.first_timestamp else "N/A",
3286                conv.last_timestamp.strftime("%Y-%m-%d") if conv.last_timestamp else "N/A"
3287            )
3288
3289        # Display table
3290        self.ctx.console.print(table)
3291        self.ctx.console.print("\nEnter the numbers of conversations to export, separated by spaces.")
3292        self.ctx.console.print("Enter 'all' to export all conversations.")
3293
3294        # Get user selection
3295        selection = await self._get_user_input("\nSelection: ")
3296
3297        if selection.lower() == 'all':
3298            return conversations
3299
3300        # Parse selection
3301        try:
3302            indices = [int(idx.strip()) for idx in selection.split() if idx.strip()]
3303
3304            # Validate indices
3305            valid_indices = [idx for idx in indices if 1 <= idx <= len(conversations)]
3306            if not valid_indices:
3307                self.ctx.console.print("[bold red]No valid selection made[/bold red]")
3308                return []
3309
3310            # Get selected conversations
3311            selected = [conversations[idx-1] for idx in valid_indices]
3312
3313            # Confirm selection
3314            self.ctx.console.print(f"\nYou selected [cyan]{len(selected)}[/cyan] conversations:")
3315            for conv in selected[:5]:
3316                self.ctx.console.print(f"- {conv.display_name}")
3317
3318            if len(selected) > 5:
3319                self.ctx.console.print(f"- ... and {len(selected) - 5} more")
3320
3321            confirm = Confirm.ask("Confirm this selection?", default=True)
3322            if not confirm:
3323                return await self._rich_select_conversations(conversations)
3324
3325            return selected
3326
3327        except ValueError:
3328            self.ctx.console.print("[bold red]Invalid selection format[/bold red]")
3329            return []
3330
3331    async def _text_select_conversations(self, conversations: List[SkypeConversation]) -> List[SkypeConversation]:
3332        """
3333        Select conversations using text UI.
3334
3335        Args:
3336            conversations: Available conversations
3337
3338        Returns:
3339            List of selected conversations
3340        """
3341        print("\nYou have conversations with the following:")
3342        print("--------------------------------------------")
3343
3344        for i, conv in enumerate(conversations, 1):
3345            first_date = "N/A"
3346            if conv.first_timestamp:
3347                first_date = conv.first_timestamp.strftime("%Y-%m-%d")
3348
3349            print(f"{i:3} -> {conv.display_name} ({conv.message_count} messages, since {first_date})")
3350
3351        print("\nEnter the numbers of conversations to export, separated by spaces.")
3352        print("Enter 'all' to export all conversations.")
3353
3354        # Get user selection
3355        selection = await self._get_user_input("\nSelection: ")
3356
3357        if selection.lower() == 'all':
3358            return conversations
3359
3360        # Parse selection
3361        try:
3362            indices = [int(idx.strip()) for idx in selection.split() if idx.strip()]
3363
3364            # Validate indices
3365            valid_indices = [idx for idx in indices if 1 <= idx <= len(conversations)]
3366            if not valid_indices:
3367                print("No valid selection made")
3368                return []
3369
3370            # Get selected conversations
3371            selected = [conversations[idx-1] for idx in valid_indices]
3372
3373            # Confirm selection
3374            print(f"\nYou selected {len(selected)} conversations:")
3375            for conv in selected[:5]:
3376                print(f"- {conv.display_name}")
3377
3378            if len(selected) > 5:
3379                print(f"- ... and {len(selected) - 5} more")
3380
3381            confirm = input("\nConfirm this selection? (y/n) [default: y]: ").strip().lower()
3382            if confirm == "n":
3383                return await self._text_select_conversations(conversations)
3384
3385            return selected
3386
3387        except ValueError:
3388            print("Invalid selection format")
3389            return []
3390
3391    async def _get_user_input(self, prompt: str) -> str:
3392        """
3393        Get user input asynchronously.
3394
3395        Args:
3396            prompt: Prompt text
3397
3398        Returns:
3399            User input string
3400        """
3401        loop = asyncio.get_event_loop()
3402        return await loop.run_in_executor(None, input, prompt)
3403
3404class UserInterface:
3405    """Main user interface handling interaction and display."""
3406
3407    def __init__(self, ctx: AppContext):
3408        """
3409        Initialize the UI.
3410
3411        Args:
3412            ctx: Application context
3413        """
3414        self.ctx = ctx
3415        self.logger = get_logger('ui', ctx)
3416
3417    async def get_user_display_name(self) -> str:
3418        """
3419        Get display name from user with enhanced validation.
3420
3421        Returns:
3422            User display name
3423        """
3424        # Use rich UI if available
3425        if RICH_AVAILABLE:
3426            self.ctx.console.print("\n[bold cyan]Please enter your display name for the logs:[/bold cyan]")
3427            display_name = await self._get_user_input("")
3428        else:
3429            display_name = await self._get_user_input("\nIn the logs, your name should be displayed as: ")
3430
3431        # Validate input
3432        while not display_name.strip():
3433            if RICH_AVAILABLE:
3434                self.ctx.console.print("[bold red]Display name cannot be empty![/bold red]")
3435                display_name = await self._get_user_input("Please enter how you want your name to be displayed: ")
3436            else:
3437                display_name = await self._get_user_input("\nPlease enter how you want your name to be displayed: ")
3438
3439        # Additional validation for unusually long names
3440        if len(display_name) > 50:
3441            warning = "Your display name is unusually long. Are you sure you want to use this name?"
3442
3443            if RICH_AVAILABLE:
3444                self.ctx.console.print(f"[bold yellow]{warning}[/bold yellow]")
3445                confirm = Confirm.ask("Continue with this name?", default=True)
3446                if not confirm:
3447                    return await self.get_user_display_name()
3448            else:
3449                print(f"\nWarning: {warning}")
3450                confirm = input("Continue with this name? (y/n) [default: y]: ").strip().lower()
3451                if confirm == "n":
3452                    return await self.get_user_display_name()
3453
3454        return display_name
3455
3456    def display_welcome(self) -> None:
3457        """Display welcome message with app info."""
3458        if RICH_AVAILABLE:
3459            # Create fancy header
3460            self.ctx.console.print("\n[bold blue]╔═══════════════════════════════════════════════════════════╗[/bold blue]")
3461            self.ctx.console.print("[bold blue]║[/bold blue]                [bold cyan]SkypeExporter v2.0.0[/bold cyan]                [bold blue]║[/bold blue]")
3462            self.ctx.console.print("[bold blue]║[/bold blue]           [italic]Enterprise-Grade Skype Chat Parser[/italic]           [bold blue]║[/bold blue]")
3463            self.ctx.console.print("[bold blue]╚═══════════════════════════════════════════════════════════╝[/bold blue]\n")
3464
3465            # Show system info
3466            self.ctx.console.print("[bold]System Information:[/bold]")
3467            self.ctx.console.print(f"  Python: {platform.python_version()}")
3468            self.ctx.console.print(f"  Platform: {platform.system()} {platform.release()}")
3469
3470            # Show memory info if available
3471            if self.ctx.memory_monitor:
3472                mem_usage = self.ctx.memory_monitor.get_memory_usage_mb()
3473                mem_percent = self.ctx.memory_monitor.get_memory_percent()
3474                sys_memory = self.ctx.memory_monitor.get_system_memory_mb()
3475
3476                self.ctx.console.print(f"  Memory: {mem_usage:.1f} MB / {sys_memory:.1f} MB ({mem_percent:.1f}%)")
3477
3478            # Show dependency status
3479            self.ctx.console.print("\n[bold]Dependency Status:[/bold]")
3480            dep_status = check_dependencies()
3481            for pkg, status in dep_status.items():
3482                color = "green" if status else "red"
3483                symbol = "✓" if status else "✗"
3484                self.ctx.console.print(f"  [{color}]{symbol}[/{color}] {pkg}")
3485
3486            # Show mode info
3487            mode = "[bold cyan]Basic Mode[/bold cyan]" if self.ctx.options.basic_mode else "[bold green]Advanced Mode[/bold green]"
3488            self.ctx.console.print(f"\nRunning in {mode}")
3489
3490            self.ctx.console.print("\n[italic]Starting export process...[/italic]\n")
3491
3492        else:
3493            # Simple text header
3494            print("\n" + "=" * 60)
3495            print("              SkypeExporter v2.0.0")
3496            print("      Enterprise-Grade Skype Chat Parser")
3497            print("=" * 60 + "\n")
3498
3499            # Show system info
3500            print(f"Python: {platform.python_version()}")
3501            print(f"Platform: {platform.system()} {platform.release()}")
3502
3503            # Show memory info if available
3504            if self.ctx.memory_monitor:
3505                mem_usage = self.ctx.memory_monitor.get_memory_usage_mb()
3506                mem_percent = self.ctx.memory_monitor.get_memory_percent()
3507                sys_memory = self.ctx.memory_monitor.get_system_memory_mb()
3508
3509                print(f"Memory: {mem_usage:.1f} MB / {sys_memory:.1f} MB ({mem_percent:.1f}%)")
3510
3511            # Show dependency status
3512            print("\nDependency Status:")
3513            dep_status = check_dependencies()
3514            for pkg, status in dep_status.items():
3515                symbol = "✓" if status else "✗"
3516                print(f"  {symbol} {pkg}")
3517
3518            # Show mode info
3519            mode = "Basic Mode" if self.ctx.options.basic_mode else "Advanced Mode"
3520            print(f"\nRunning in {mode}")
3521
3522            print("\nStarting export process...\n")
3523
3524    def display_summary(self, skype_export: SkypeExport, exported_files: Dict[str, List[Path]]) -> None:
3525        """
3526        Display export summary.
3527
3528        Args:
3529            skype_export: Complete Skype export data
3530            exported_files: Dictionary of exported files by format
3531        """
3532        total_conversations = sum(len(files) for files in exported_files.values())
3533        elapsed_time = time.time() - self.ctx.start_time
3534        output_dir = self.ctx.options.output_dir
3535
3536        if RICH_AVAILABLE and not self.ctx.options.basic_mode:
3537            # Create summary panel
3538            summary = Table(title="Export Summary", show_header=False, box=None)
3539            summary.add_column("", style="bold cyan")
3540            summary.add_column("")
3541
3542            summary.add_row("Total conversations:", str(skype_export.total_conversations))
3543            summary.add_row("Total messages:", str(skype_export.total_messages))
3544            summary.add_row("Exported conversations:", str(total_conversations))
3545
3546            # Add export formats
3547            for format_name, files in exported_files.items():
3548                if files:
3549                    summary.add_row(f"{format_name} files:", str(len(files)))
3550
3551            summary.add_row("Output directory:", str(output_dir))
3552            summary.add_row("Processing time:", f"{elapsed_time:.2f} seconds")
3553
3554            # Add memory usage if available
3555            memory_report = self.ctx.get_memory_report()
3556            if memory_report:
3557                peak_mb = memory_report.get("peak_usage_mb", 0)
3558                summary.add_row("Peak memory usage:", f"{peak_mb:.2f} MB")
3559
3560            if self.ctx.errors:
3561                summary.add_row("Errors:", f"[bold red]{len(self.ctx.errors)}[/bold red]")
3562
3563            # Display summary in panel
3564            panel = Panel(summary, title="SkypeExporter Completed", border_style="green")
3565            self.ctx.console.print(panel)
3566
3567            # Show errors if any
3568            if self.ctx.errors:
3569                self.ctx.console.print("\n[bold red]Errors encountered:[/bold red]")
3570                for i, error in enumerate(self.ctx.errors, 1):
3571                    self.ctx.console.print(f"  {i}. {error['type']} - {error['error']}")
3572
3573            self.ctx.console.print("\n[bold green]Export completed successfully![/bold green]")
3574            self.ctx.console.print(f"Files saved to: [cyan]{output_dir}[/cyan]")
3575
3576        else:
3577            # Simple text summary
3578            print("\n" + "=" * 60)
3579            print("                Export Summary")
3580            print("-" * 60)
3581            print(f"Total conversations: {skype_export.total_conversations}")
3582            print(f"Total messages: {skype_export.total_messages}")
3583            print(f"Exported conversations: {total_conversations}")
3584
3585            # Add export formats
3586            for format_name, files in exported_files.items():
3587                if files:
3588                    print(f"{format_name} files: {len(files)}")
3589
3590            print(f"Output directory: {output_dir}")
3591            print(f"Processing time: {elapsed_time:.2f} seconds")
3592
3593            # Add memory usage if available
3594            memory_report = self.ctx.get_memory_report()
3595            if memory_report:
3596                peak_mb = memory_report.get("peak_usage_mb", 0)
3597                print(f"Peak memory usage: {peak_mb:.2f} MB")
3598
3599            if self.ctx.errors:
3600                print(f"Errors: {len(self.ctx.errors)}")
3601
3602            print("=" * 60)
3603
3604            # Show errors if any
3605            if self.ctx.errors:
3606                print("\nErrors encountered:")
3607                for i, error in enumerate(self.ctx.errors, 1):
3608                    print(f"  {i}. {error['type']} - {error['error']}")
3609
3610            print("\nExport completed successfully!")
3611            print(f"Files saved to: {output_dir}")
3612
3613    async def _get_user_input(self, prompt: str) -> str:
3614        """
3615        Get user input asynchronously.
3616
3617        Args:
3618            prompt: Prompt text
3619
3620        Returns:
3621            User input string
3622        """
3623        loop = asyncio.get_event_loop()
3624        return await loop.run_in_executor(None, input, prompt)
3625
3626# ═════════════════════════════════════════════════════════════════════════════
3627# ═══════════════════════════ APPLICATION CORE ═══════════════════════════════
3628# ═════════════════════════════════════════════════════════════════════════════
3629
3630class SkypeExporterApp:
3631    """Main application class orchestrating the export process."""
3632
3633    def __init__(self):
3634        """Initialize the application."""
3635        # Parse command line arguments
3636        self.args = self._parse_args()
3637
3638        # Create app context
3639        self.ctx = AppContext(
3640            options=self._create_options(),
3641            logger=setup_logging(
3642                LogLevel.DEBUG if self.args.debug else LogLevel.INFO,
3643                log_file=Path(self.args.log_file) if self.args.log_file else None
3644            )
3645        )
3646
3647        # Create UI components
3648        self.ui = UserInterface(self.ctx)
3649        self.selector = ConversationSelector(self.ctx)
3650        self.basic_mode_handler = BasicModeHandler(self.ctx)
3651
3652        # Set up signal handlers
3653        self._setup_signal_handlers()
3654
3655    def _parse_args(self) -> argparse.Namespace:
3656        """
3657        Parse command line arguments.
3658
3659        Returns:
3660            Parsed arguments
3661        """
3662        parser = argparse.ArgumentParser(
3663            description="SkypeExporter: Enterprise-Grade Skype Chat Log Exporter",
3664            formatter_class=argparse.ArgumentDefaultsHelpFormatter
3665        )
3666
3667        parser.add_argument('filename',
3668                         help='Path to the Skype export file (JSON, TAR, or ZIP)')
3669
3670        parser.add_argument('-o', '--output-dir',
3671                         help='Directory to save exported files',
3672                         default=os.path.join(os.getcwd(), "skype_exports"))
3673
3674        parser.add_argument('-f', '--format',
3675                         choices=['text', 'html', 'markdown', 'json', 'postgresql', 'all'],
3676                         default='text',
3677                         help='Output format for exported conversations')
3678
3679        parser.add_argument('-c', '--choose',
3680                         action='store_true',
3681                         help='Choose which conversations to export')
3682
3683        parser.add_argument('-p', '--pattern',
3684                         help='Filter conversations by name pattern (supports wildcards)')
3685
3686        parser.add_argument('--filter',
3687                         help='Alternative name for pattern filter')
3688
3689        parser.add_argument('-a', '--anonymize',
3690                         action='store_true',
3691                         help='Anonymize user names in exports')
3692
3693        parser.add_argument('-s', '--stats',
3694                         action='store_true',
3695                         help='Include conversation statistics')
3696
3697        parser.add_argument('--no-stats',
3698                         action='store_true',
3699                         help='Exclude conversation statistics')
3700
3701        parser.add_argument('-t', '--timestamps',
3702                         action='store_true',
3703                         default=True,
3704                         help='Include timestamps in exports')
3705
3706        parser.add_argument('--no-timestamps',
3707                         action='store_true',
3708                         help='Exclude timestamps from exports')
3709
3710        parser.add_argument('-l', '--local-time',
3711                         action='store_true',
3712                         help='Use local time instead of UTC')
3713
3714        parser.add_argument('--utc',
3715                         action='store_true',
3716                         help='Use UTC time (default)')
3717
3718        parser.add_argument('--no-parallel',
3719                         action='store_true',
3720                         help='Disable parallel processing')
3721
3722        parser.add_argument('--batch-size',
3723                         type=int,
3724                         help='Batch size for processing messages')
3725
3726        parser.add_argument('--max-workers',
3727                         type=int,
3728                         help='Maximum number of worker threads for parallel processing')
3729
3730        parser.add_argument('--compress',
3731                         action='store_true',
3732                         help='Compress output files into ZIP archive')
3733
3734        parser.add_argument('--timezone',
3735                         help='Timezone for timestamps (e.g. "America/New_York")')
3736
3737        parser.add_argument('--no-pretty',
3738                         action='store_true',
3739                         help='Disable pretty printing for JSON output')
3740
3741        parser.add_argument('--include-metadata',
3742                         action='store_true',
3743                         help='Include metadata in exports')
3744
3745        parser.add_argument('--include-ids',
3746                         action='store_true',
3747                         help='Include message IDs in exports')
3748
3749        parser.add_argument('--include-html',
3750                         action='store_true',
3751                         help='Include HTML in exports')
3752
3753        parser.add_argument('--media-links',
3754                         action='store_true',
3755                         help='Include media links in exports')
3756
3757        parser.add_argument('--date-from',
3758                         help='Start date for filtering messages (YYYY-MM-DD format)')
3759
3760        parser.add_argument('--date-to',
3761                         help='End date for filtering messages (YYYY-MM-DD format)')
3762
3763        parser.add_argument('--debug',
3764                         action='store_true',
3765                         help='Enable debug logging')
3766
3767        parser.add_argument('--log-file',
3768                         help='Path to log file')
3769
3770        parser.add_argument('--basic',
3771                         action='store_true',
3772                         help='Use basic mode with simplified interaction')
3773
3774        parser.add_argument('--memory-profile',
3775                         action='store_true',
3776                         help='Enable memory profiling')
3777
3778        parser.add_argument('--no-memory-optimization',
3779                         action='store_true',
3780                         help='Disable automatic memory optimization')
3781
3782        parser.add_argument('--no-memory-opt',
3783                         action='store_true',
3784                         help='Alternative name for disabling memory optimization')
3785
3786        parser.add_argument('--memory-threshold',
3787                         type=int,
3788                         help='Memory usage threshold percentage for optimization (1-99)')
3789
3790        # PostgreSQL options
3791        db_group = parser.add_argument_group('PostgreSQL Database Options')
3792        db_group.add_argument('--db-host',
3793                            help='Database host (for PostgreSQL export)',
3794                            default='localhost')
3795        db_group.add_argument('--db-port',
3796                            type=int,
3797                            help='Database port (for PostgreSQL export)',
3798                            default=5432)
3799        db_group.add_argument('--db-name',
3800                            help='Database name (for PostgreSQL export)',
3801                            default='skype_export')
3802        db_group.add_argument('--db-user',
3803                            help='Database username (for PostgreSQL export)',
3804                            default='postgres')
3805        db_group.add_argument('--db-password',
3806                            help='Database password (for PostgreSQL export)',
3807                            default='')
3808        db_group.add_argument('--db-engine',
3809                            help='Database engine (for PostgreSQL export)',
3810                            default='postgresql')
3811        db_group.add_argument('--db-schema',
3812                            help='Database schema (for PostgreSQL export)',
3813                            default='public')
3814        db_group.add_argument('--db-echo',
3815                            action='store_true',
3816                            help='Echo SQL queries (for debugging)')
3817
3818        parser.add_argument('--version',
3819                         action='version',
3820                         version='SkypeExporter 2.0.0')
3821
3822        return parser.parse_args()
3823
3824    def _create_options(self) -> ExportOptions:
3825        """
3826        Create export options from command line arguments.
3827
3828        Returns:
3829            Configured ExportOptions object
3830        """
3831        args = self.args
3832
3833        # Validate numeric inputs
3834        try:
3835            if args.batch_size is not None:
3836                args.batch_size = int(args.batch_size)
3837                if args.batch_size <= 0:
3838                    raise ConfigError("Batch size must be a positive integer")
3839
3840            if args.max_workers is not None:
3841                args.max_workers = int(args.max_workers)
3842                if args.max_workers < 1:
3843                    raise ConfigError("Max workers must be at least 1")
3844
3845            if args.memory_threshold is not None:
3846                args.memory_threshold = int(args.memory_threshold)
3847                if not (1 <= args.memory_threshold <= 99):
3848                    raise ConfigError("Memory threshold must be between 1 and 99 percent")
3849        except ValueError:
3850            raise ConfigError("Numeric parameters must be valid integers")
3851
3852        # Create output directory
3853        output_dir = Path(args.output_dir if args.output_dir else DEFAULT_OUTPUT_DIR)
3854
3855        # Validate output directory
3856        if not output_dir.parent.exists():
3857            raise ConfigError(f"Parent directory does not exist: {output_dir.parent}")
3858
3859        # Create database configuration if needed
3860        if args.format == 'postgresql' or args.format == 'all':
3861            db_config = DatabaseConfig(
3862                engine=args.db_engine,
3863                host=args.db_host,
3864                port=int(args.db_port),
3865                database=args.db_name,
3866                username=args.db_user,
3867                password=args.db_password,
3868                schema=args.db_schema,
3869                echo_sql=args.db_echo
3870            )
3871        else:
3872            db_config = DatabaseConfig()
3873
3874        # Handle date range if specified
3875        date_range = None
3876        if args.date_from and args.date_to:
3877            try:
3878                date_from = datetime.datetime.strptime(args.date_from, '%Y-%m-%d').date()
3879                date_to = datetime.datetime.strptime(args.date_to, '%Y-%m-%d').date()
3880                date_range = (date_from, date_to)
3881            except ValueError:
3882                raise ConfigError("Date range must be in YYYY-MM-DD format")
3883
3884        # Determine output format
3885        format_str = args.format.lower() if args.format else 'text'
3886        try:
3887            output_format = {
3888                'text': OutputFormat.TEXT,
3889                'html': OutputFormat.HTML,
3890                'markdown': OutputFormat.MARKDOWN,
3891                'json': OutputFormat.JSON,
3892                'postgresql': OutputFormat.POSTGRESQL,
3893                'all': OutputFormat.ALL
3894            }[format_str]
3895        except KeyError:
3896            raise ConfigError(f"Invalid output format: {format_str}")
3897
3898        # Build options object
3899        options = ExportOptions(
3900            output_dir=output_dir,
3901            format=output_format,
3902            anonymize=args.anonymize,
3903            include_timestamps=not args.no_timestamps,
3904            use_local_time=not args.utc,
3905            include_metadata=args.include_metadata,
3906            include_message_ids=args.include_ids,
3907            parallel=not args.no_parallel,
3908            max_workers=args.max_workers or max(1, os.cpu_count() or 4),
3909            batch_size=args.batch_size or 1000,
3910            timezone=args.timezone,
3911            pretty_print=not args.no_pretty,
3912            compress_output=args.compress,
3913            filter_pattern=args.filter,
3914            date_range=date_range,
3915            include_conversation_stats=not args.no_stats,
3916            media_links=args.media_links,
3917            strip_html=not args.include_html,
3918            debug_mode=args.debug,
3919            basic_mode=args.basic,
3920            enable_memory_optimization=not args.no_memory_opt,
3921            memory_profile=args.memory_profile,
3922            memory_threshold_percent=args.memory_threshold or 75,
3923            database_config=db_config
3924        )
3925
3926        return options
3927
3928    def _setup_signal_handlers(self) -> None:
3929        """Set up handlers for system signals."""
3930        # Handle SIGINT (Ctrl+C)
3931        if hasattr(signal, 'SIGINT'):
3932            signal.signal(signal.SIGINT, self._signal_handler)
3933
3934        # Handle SIGTERM
3935        if hasattr(signal, 'SIGTERM'):
3936            signal.signal(signal.SIGTERM, self._signal_handler)
3937
3938    def _signal_handler(self, sig, frame) -> None:
3939        """
3940        Handle system signals to allow graceful shutdown.
3941
3942        Args:
3943            sig: Signal number
3944            frame: Current stack frame
3945        """
3946        self.ctx.logger.info(f"Received signal {sig}, shutting down gracefully...")
3947        self.ctx.cancel_requested = True
3948
3949    async def run(self) -> int:
3950        """
3951        Run the application.
3952
3953        Returns:
3954            Exit code (0 for success, non-zero for error)
3955        """
3956        try:
3957            # Run in basic mode if requested
3958            if self.ctx.options.basic_mode:
3959                input_path = Path(self.args.filename)
3960                if not input_path.exists():
3961                    print(f"Error: Input file not found: {input_path}")
3962                    return 1
3963
3964                return await self.basic_mode_handler.run(input_path)
3965
3966            # Standard advanced mode
3967            # Display welcome message
3968            self.ui.display_welcome()
3969
3970            # Check dependencies
3971            dependency_status = check_dependencies()
3972            missing_deps = [pkg for pkg, status in dependency_status.items() if not status]
3973
3974            if missing_deps:
3975                self.ctx.logger.warning(f"Missing dependencies: {', '.join(missing_deps)}")
3976
3977                # Try to install missing dependencies
3978                if self.ctx.options.format != OutputFormat.TEXT:
3979                    # Check if required deps for the selected format are missing
3980                    format_deps = {
3981                        OutputFormat.HTML: ['jinja2'],
3982                        OutputFormat.MARKDOWN: ['markdown'],
3983                        OutputFormat.POSTGRESQL: ['sqlalchemy', 'psycopg2-binary']
3984                    }
3985
3986                    required_for_format = format_deps.get(self.ctx.options.format, [])
3987                    missing_required = [d for d in required_for_format if d in missing_deps]
3988
3989                    if missing_required:
3990                        self.ctx.logger.info("Attempting to install missing dependencies required for "
3991                                         f"{self.ctx.options.format.name} format...")
3992                        install_dependencies()
3993
3994            # Get user display name
3995            self.ctx.user_display_name = await self.ui.get_user_display_name()
3996
3997            # Process input file
3998            input_path = Path(self.args.filename)
3999            if not input_path.exists():
4000                self.ctx.logger.error(f"Input file not found: {input_path}")
4001                return 1
4002
4003            # Create appropriate reader and read file
4004            reader = FileReader.create_reader(input_path)
4005            raw_data = await reader.read(input_path, self.ctx)
4006
4007            # Parse the export data
4008            parser = SkypeExportParser(self.ctx)
4009            skype_export = await parser.parse(raw_data)
4010
4011            # Select conversations to export
4012            selected_conversations = None
4013            if self.args.choose:
4014                selected_conversations = await self.selector.select_conversations(skype_export)
4015
4016                if not selected_conversations:
4017                    self.ctx.logger.warning("No conversations selected, nothing to export")
4018                    return 0
4019
4020            # Export selected conversations
4021            export_manager = ExportManager(self.ctx)
4022            exported_files = await export_manager.export_conversations(
4023                skype_export, selected_conversations
4024            )
4025
4026            # Display summary
4027            self.ui.display_summary(skype_export, exported_files)
4028
4029            return 0
4030
4031        except Exception as e:
4032            if self.ctx.options.basic_mode:
4033                print(f"Error: {e}")
4034            else:
4035                self.ctx.logger.error(f"Error: {e}")
4036
4037            if self.ctx.options.debug_mode:
4038                if RICH_AVAILABLE:
4039                    self.ctx.console.print_exception()
4040                else:
4041                    self.ctx.logger.error(traceback.format_exc())
4042            return 1
4043
4044def main() -> int:
4045    """
4046    Main entry point for the application.
4047
4048    Returns:
4049        Exit code
4050    """
4051    app = SkypeExporterApp()
4052
4053    # Get the event loop
4054    try:
4055        loop = asyncio.get_event_loop()
4056    except RuntimeError:
4057        # Create new event loop if none exists
4058        loop = asyncio.new_event_loop()
4059        asyncio.set_event_loop(loop)
4060
4061    # Run the application
4062    try:
4063        return loop.run_until_complete(app.run())
4064    except KeyboardInterrupt:
4065        print("\nOperation cancelled by user")
4066        return 130  # Standard exit code for SIGINT
4067    finally:
4068        # Clean up
4069        loop.close()
4070
4071if __name__ == "__main__":
4072    sys.exit(main())