#!/usr/bin/env python3 """ AOC Sync - Polls git repositories containing Advent of Code implementations and generates performance comparison reports. """ import os import sys import yaml import json import sqlite3 import subprocess import shutil import re import time import logging from pathlib import Path from datetime import datetime from typing import Dict, List, Optional, Tuple from dataclasses import dataclass, asdict from collections import defaultdict # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s' ) logger = logging.getLogger(__name__) @dataclass class PerformanceResult: """Stores performance data for a single part of a day""" user: str year: int day: int part: int time_ns: int # Time in nanoseconds timestamp: str class Config: """Configuration manager""" def __init__(self, config_path: str = "config.yaml"): self.config_path = config_path self.config = self._load_config() def _load_config(self) -> dict: """Load configuration from YAML file""" if not os.path.exists(self.config_path): logger.error(f"Config file not found: {self.config_path}") sys.exit(1) with open(self.config_path, 'r') as f: return yaml.safe_load(f) @property def poll_interval(self) -> int: return self.config.get('poll_interval', 300) @property def output_dir(self) -> str: return self.config.get('output_dir', 'output') @property def data_dir(self) -> str: return self.config.get('data_dir', 'data') @property def repositories(self) -> List[dict]: return self.config.get('repositories', []) @property def compare_years(self) -> Optional[List[int]]: return self.config.get('compare_years') @property def compare_days(self) -> Optional[List[int]]: return self.config.get('compare_days') class Database: """SQLite database for storing performance results""" def __init__(self, db_path: str): self.db_path = db_path os.makedirs(os.path.dirname(db_path) if os.path.dirname(db_path) else '.', exist_ok=True) self._init_db() def _init_db(self): """Initialize database schema""" conn = sqlite3.connect(self.db_path) cursor = conn.cursor() cursor.execute(''' CREATE TABLE IF NOT EXISTS results ( id INTEGER PRIMARY KEY AUTOINCREMENT, user TEXT NOT NULL, year INTEGER NOT NULL, day INTEGER NOT NULL, part INTEGER NOT NULL, time_ns INTEGER NOT NULL, timestamp TEXT NOT NULL, UNIQUE(user, year, day, part, timestamp) ) ''') cursor.execute(''' CREATE INDEX IF NOT EXISTS idx_user_year_day_part ON results(user, year, day, part) ''') conn.commit() conn.close() def insert_result(self, result: PerformanceResult): """Insert a performance result""" conn = sqlite3.connect(self.db_path) cursor = conn.cursor() try: cursor.execute(''' INSERT OR REPLACE INTO results (user, year, day, part, time_ns, timestamp) VALUES (?, ?, ?, ?, ?, ?) ''', (result.user, result.year, result.day, result.part, result.time_ns, result.timestamp)) conn.commit() except sqlite3.IntegrityError: # Already exists, skip pass finally: conn.close() def get_latest_results(self, years: Optional[List[int]] = None, days: Optional[List[int]] = None) -> List[Dict]: """Get latest performance results for each user/day/part""" conn = sqlite3.connect(self.db_path) cursor = conn.cursor() query = ''' SELECT user, year, day, part, time_ns, timestamp FROM results r1 WHERE timestamp = ( SELECT MAX(timestamp) FROM results r2 WHERE r2.user = r1.user AND r2.year = r1.year AND r2.day = r1.day AND r2.part = r1.part ) ''' conditions = [] params = [] if years: placeholders = ','.join('?' * len(years)) conditions.append(f'year IN ({placeholders})') params.extend(years) if days: placeholders = ','.join('?' * len(days)) conditions.append(f'day IN ({placeholders})') params.extend(days) if conditions: query += ' AND ' + ' AND '.join(conditions) query += ' ORDER BY year, day, part, user' cursor.execute(query, params) rows = cursor.fetchall() conn.close() return [ { 'user': row[0], 'year': row[1], 'day': row[2], 'part': row[3], 'time_ns': row[4], 'timestamp': row[5] } for row in rows ] def get_all_users(self) -> List[str]: """Get list of all users""" conn = sqlite3.connect(self.db_path) cursor = conn.cursor() cursor.execute('SELECT DISTINCT user FROM results') users = [row[0] for row in cursor.fetchall()] conn.close() return users def get_all_years(self) -> List[int]: """Get list of all years""" conn = sqlite3.connect(self.db_path) cursor = conn.cursor() cursor.execute('SELECT DISTINCT year FROM results ORDER BY year') years = [row[0] for row in cursor.fetchall()] conn.close() return years class GitManager: """Manages git repository operations""" @staticmethod def clone_or_update_repo(url: str, local_path: str) -> bool: """Clone repository if it doesn't exist, or update if it does""" local_path = Path(local_path) if local_path.exists() and (local_path / '.git').exists(): # Update existing repository logger.info(f"Updating repository: {local_path}") try: subprocess.run( ['git', 'fetch', 'origin'], cwd=local_path, check=True, capture_output=True ) subprocess.run( ['git', 'reset', '--hard', 'origin/master'], cwd=local_path, check=True, capture_output=True ) # Try main branch if master fails except subprocess.CalledProcessError: try: subprocess.run( ['git', 'reset', '--hard', 'origin/main'], cwd=local_path, check=True, capture_output=True ) except subprocess.CalledProcessError as e: logger.error(f"Failed to update {local_path}: {e}") return False return True else: # Clone new repository logger.info(f"Cloning repository: {url} to {local_path}") local_path.parent.mkdir(parents=True, exist_ok=True) try: subprocess.run( ['git', 'clone', url, str(local_path)], check=True, capture_output=True ) return True except subprocess.CalledProcessError as e: logger.error(f"Failed to clone {url}: {e}") return False @staticmethod def has_changes(url: str, local_path: str) -> bool: """Check if remote repository has changes""" local_path = Path(local_path) if not local_path.exists() or not (local_path / '.git').exists(): return True # Needs to be cloned try: # Fetch latest changes subprocess.run( ['git', 'fetch', 'origin'], cwd=local_path, check=True, capture_output=True ) # Check if local is behind remote result = subprocess.run( ['git', 'rev-list', '--count', 'HEAD..origin/master'], cwd=local_path, capture_output=True, text=True ) if result.returncode != 0: # Try main branch result = subprocess.run( ['git', 'rev-list', '--count', 'HEAD..origin/main'], cwd=local_path, capture_output=True, text=True ) if result.returncode == 0: behind_count = int(result.stdout.strip()) return behind_count > 0 return False except Exception as e: logger.error(f"Error checking for changes: {e}") return True # Assume changes to be safe class CargoAOCRunner: """Runs cargo-aoc benchmarks and parses results""" @staticmethod def find_implemented_days(work_dir: Path) -> List[int]: """Find which days are implemented in the directory Args: work_dir: Directory to search (should be a year directory for single repos) """ days = [] work_dir = Path(work_dir) # Look for common patterns: src/bin/day01.rs, src/day01.rs, etc. patterns = [ 'src/bin/day*.rs', 'src/day*.rs', '**/src/bin/day*.rs', '**/src/day*.rs', ] for pattern in patterns: for day_file in work_dir.glob(pattern): match = re.search(r'day(\d+)', day_file.name) if match: day_num = int(match.group(1)) if day_num not in days: days.append(day_num) # Also check for Cargo.toml with day references cargo_toml = work_dir / 'Cargo.toml' if cargo_toml.exists(): with open(cargo_toml, 'r') as f: content = f.read() for match in re.finditer(r'day(\d+)', content): day_num = int(match.group(1)) if day_num not in days: days.append(day_num) return sorted(days) @staticmethod def extract_years_from_repo(repo_path: Path) -> List[int]: """Try to extract year(s) from repository structure For single repos, looks for year directories in the root (e.g., 2023/, 2024/) """ years = [] repo_path = Path(repo_path) # Check for year directories in root (e.g., 2023/, 2024/) # These should be directories with 4-digit year names for item in repo_path.iterdir(): if item.is_dir() and not item.name.startswith('.'): # Check if directory name is exactly a 4-digit year if re.match(r'^\d{4}$', item.name): year = int(item.name) if 2015 <= year <= 2030: # Reasonable range years.append(year) # Also check path name as fallback if not years: path_str = str(repo_path) for year_match in re.finditer(r'(\d{4})', path_str): year = int(year_match.group(1)) if 2015 <= year <= 2030 and year not in years: years.append(year) return sorted(years) if years else [] @staticmethod def run_benchmarks(repo_path: Path, year: int, user: str = "unknown", is_multi_year: bool = False) -> List[PerformanceResult]: """Run cargo aoc benchmarks and parse results Args: repo_path: Path to the repository root (for single repos) or year directory (for multi-year repos) year: The year to benchmark user: User name for the results is_multi_year: True if this is a multi-year repo (repo_path is already the year directory) """ results = [] repo_path = Path(repo_path) # Determine the working directory if is_multi_year: # For multi-year repos, repo_path is already the year directory work_dir = repo_path else: # For single repos, check if we need to navigate to a year subdirectory work_dir = repo_path year_dir = repo_path / str(year) if year_dir.exists() and year_dir.is_dir(): work_dir = year_dir logger.info(f"Using year directory: {work_dir}") if not (work_dir / 'Cargo.toml').exists(): logger.warning(f"No Cargo.toml found in {work_dir}") return results days = CargoAOCRunner.find_implemented_days(work_dir) logger.info(f"Found {len(days)} implemented days in {work_dir}") for day in days: try: logger.info(f"Running cargo aoc for {user} year {year} day {day} in {work_dir}") # Run cargo aoc for this day (no year flag, must be in correct directory) cmd = ['cargo', 'aoc', '--day', str(day)] result = subprocess.run( cmd, cwd=work_dir, capture_output=True, text=True, timeout=300 # 5 minute timeout per day ) if result.returncode != 0: logger.warning(f"cargo aoc failed for day {day} in {work_dir}: {result.stderr}") continue # Parse output for runtime information day_results = CargoAOCRunner._parse_runtime_output( result.stdout, result.stderr, day, year, user ) if day_results: logger.info(f"Parsed {len(day_results)} runtime result(s) for {user} year {year} day {day}") else: logger.warning(f"No runtime data parsed for {user} year {year} day {day}") results.extend(day_results) except subprocess.TimeoutExpired: logger.error(f"Timeout running cargo aoc for day {day}") except Exception as e: logger.error(f"Error running cargo aoc for day {day}: {e}") return results @staticmethod def _parse_runtime_output(stdout: str, stderr: str, day: int, year: int, user: str) -> List[PerformanceResult]: """Parse cargo-aoc runtime output cargo aoc typically outputs timing information like: - "Day X - Part Y: XXX.XXX ms" - "Day X - Part Y: XXX.XXX μs" - "Day X - Part Y: XXX.XXX ns" - Or similar formats """ results = [] timestamp = datetime.now().isoformat() # Combine stdout and stderr (timing info might be in either) output = stdout + "\n" + stderr # Patterns to match various cargo-aoc output formats # Common formats: # "Day 1 - Part 1: 123.456 ms" # "Day 1 Part 1: 123.456 ms" # "day 1 - part 1: 123.456 ms" # "Part 1: 123.456 ms" (when day is already known) patterns = [ # Full format with day and part r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', r'day\s+(\d+)\s*-\s*part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', r'Day\s+(\d+)\s+Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', r'day\s+(\d+)\s+part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', r'day(\d+)\s*-\s*part(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', # Part only (use provided day) r'Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', r'part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', ] for pattern in patterns: for match in re.finditer(pattern, output, re.IGNORECASE): groups = match.groups() # Determine day and part based on pattern if len(groups) == 4: # Pattern with day and part part_day = int(groups[0]) part_num = int(groups[1]) time_str = groups[2] unit = groups[3].lower() actual_day = part_day if part_day > 0 and part_day <= 25 else day elif len(groups) == 3: # Pattern with only part (use provided day) part_num = int(groups[0]) time_str = groups[1] unit = groups[2].lower() actual_day = day else: continue try: time_val = float(time_str) # Convert to nanoseconds if unit == 's' or unit == 'sec' or unit == 'second': time_ns = int(time_val * 1_000_000_000) elif unit == 'ms' or unit == 'millisecond': time_ns = int(time_val * 1_000_000) elif unit == 'μs' or unit == 'us' or unit == 'microsecond': time_ns = int(time_val * 1_000) elif unit == 'ns' or unit == 'nanosecond': time_ns = int(time_val) else: # Default to nanoseconds if unit unclear logger.warning(f"Unknown time unit '{unit}', assuming nanoseconds") time_ns = int(time_val) results.append(PerformanceResult( user=user, year=year, day=actual_day, part=part_num, time_ns=time_ns, timestamp=timestamp )) except ValueError: logger.warning(f"Could not parse time: {time_str}") return results class HTMLGenerator: """Generates HTML comparison pages""" def __init__(self, output_dir: str): self.output_dir = Path(output_dir) self.output_dir.mkdir(parents=True, exist_ok=True) def generate(self, db: Database, config: Config): """Generate HTML comparison page""" years = config.compare_years or db.get_all_years() days = config.compare_days results = db.get_latest_results(years=years, days=days) users = db.get_all_users() # Organize data by year -> day -> part -> user data = defaultdict(lambda: defaultdict(lambda: defaultdict(dict))) for result in results: year = result['year'] day = result['day'] part = result['part'] user = result['user'] time_ns = result['time_ns'] # Only store if time_ns > 0 (valid result) if time_ns > 0: data[year][day][part][user] = time_ns html = self._generate_html(data, years, users) output_file = self.output_dir / 'index.html' with open(output_file, 'w') as f: f.write(html) logger.info(f"Generated HTML report: {output_file}") def _generate_html(self, data: dict, years: List[int], users: List[str]) -> str: """Generate HTML content""" html = f"""
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
Years: {', '.join(map(str, sorted(years)))}
Users: {', '.join(sorted(users))}
| User | Time | Relative Speed |
|---|---|---|
| {user} | No data | - |
| {user} | {time_str} | {relative_str} |