aocsync/aocsync.py

2989 lines
119 KiB
Python
Executable File

#!/usr/bin/env python3
"""
AOC Sync - Polls git repositories containing Advent of Code implementations
and generates performance comparison reports.
"""
import os
import sys
import yaml
import json
import sqlite3
import subprocess
import shutil
import re
import time
import logging
import threading
from pathlib import Path
from datetime import datetime
from typing import Dict, List, Optional, Tuple
from dataclasses import dataclass, asdict
from collections import defaultdict
try:
from flask import Flask, Response, jsonify
FLASK_AVAILABLE = True
except ImportError:
FLASK_AVAILABLE = False
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
@dataclass
class PerformanceResult:
"""Stores performance data for a single part of a day"""
user: str
year: int
day: int
part: int
time_ns: int # Runner time in nanoseconds
generator_time_ns: int = 0 # Generator time in nanoseconds (optional)
output_bytes: int = 0 # Number of bytes in the output/answer
git_rev: str = "" # Git revision (short hash)
repo_url: str = "" # Repository URL
timestamp: str = ""
class Config:
"""Configuration manager"""
def __init__(self, config_path: str = "config.yaml"):
self.config_path = config_path
self.config = self._load_config()
def _load_config(self) -> dict:
"""Load configuration from YAML file"""
if not os.path.exists(self.config_path):
logger.error(f"Config file not found: {self.config_path}")
sys.exit(1)
with open(self.config_path, 'r') as f:
return yaml.safe_load(f)
@property
def poll_interval(self) -> int:
return self.config.get('poll_interval', 300)
@property
def output_dir(self) -> str:
return self.config.get('output_dir', 'output')
@property
def data_dir(self) -> str:
return self.config.get('data_dir', 'data')
@property
def repositories(self) -> List[dict]:
return self.config.get('repositories', [])
@property
def compare_years(self) -> Optional[List[int]]:
return self.config.get('compare_years')
@property
def compare_days(self) -> Optional[List[int]]:
return self.config.get('compare_days')
@property
def rsync_config(self) -> Optional[dict]:
return self.config.get('rsync')
@property
def docker_config(self) -> dict:
"""Get Podman configuration with defaults"""
docker_config = self.config.get('docker', {})
return {
'build_cache_dir': docker_config.get('build_cache_dir', ''),
'registry_cache_dir': docker_config.get('registry_cache_dir', ''),
'memory': docker_config.get('memory', '2g'),
'cpus': docker_config.get('cpus', '2'),
'image': docker_config.get('image', 'aocsync:latest')
}
class Database:
"""SQLite database for storing performance results"""
def __init__(self, db_path: str):
self.db_path = db_path
os.makedirs(os.path.dirname(db_path) if os.path.dirname(db_path) else '.', exist_ok=True)
self._init_db()
def _init_db(self):
"""Initialize database schema"""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS results (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user TEXT NOT NULL,
year INTEGER NOT NULL,
day INTEGER NOT NULL,
part INTEGER NOT NULL,
time_ns INTEGER NOT NULL,
generator_time_ns INTEGER NOT NULL DEFAULT 0,
output_bytes INTEGER NOT NULL DEFAULT 0,
git_rev TEXT NOT NULL DEFAULT '',
repo_url TEXT NOT NULL DEFAULT '',
timestamp TEXT NOT NULL,
UNIQUE(user, year, day, part, timestamp, git_rev)
)
''')
# Add new columns if they don't exist (for existing databases)
for column, col_type in [
('generator_time_ns', 'INTEGER NOT NULL DEFAULT 0'),
('output_bytes', 'INTEGER NOT NULL DEFAULT 0'),
('git_rev', 'TEXT NOT NULL DEFAULT \'\''),
('repo_url', 'TEXT NOT NULL DEFAULT \'\'')
]:
try:
cursor.execute(f'ALTER TABLE results ADD COLUMN {column} {col_type}')
except sqlite3.OperationalError:
# Column already exists
pass
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_user_year_day_part
ON results(user, year, day, part)
''')
conn.commit()
conn.close()
def insert_result(self, result: PerformanceResult):
"""Insert a performance result"""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
try:
cursor.execute('''
INSERT OR REPLACE INTO results
(user, year, day, part, time_ns, generator_time_ns, output_bytes, git_rev, repo_url, timestamp)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
''', (result.user, result.year, result.day, result.part,
result.time_ns, result.generator_time_ns, result.output_bytes, result.git_rev,
result.repo_url, result.timestamp))
conn.commit()
except sqlite3.IntegrityError:
# Already exists, skip
pass
finally:
conn.close()
def get_latest_results(self, years: Optional[List[int]] = None,
days: Optional[List[int]] = None) -> List[Dict]:
"""Get latest performance results for each user/day/part
If years is None, returns all years. If days is None, returns all days.
"""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
query = '''
SELECT user, year, day, part, time_ns, generator_time_ns, output_bytes, git_rev, repo_url, timestamp
FROM results r1
WHERE timestamp = (
SELECT MAX(timestamp)
FROM results r2
WHERE r2.user = r1.user
AND r2.year = r1.year
AND r2.day = r1.day
AND r2.part = r1.part
)
'''
conditions = []
params = []
if years is not None:
placeholders = ','.join('?' * len(years))
conditions.append(f'year IN ({placeholders})')
params.extend(years)
if days is not None:
placeholders = ','.join('?' * len(days))
conditions.append(f'day IN ({placeholders})')
params.extend(days)
if conditions:
query += ' AND ' + ' AND '.join(conditions)
query += ' ORDER BY year, day, part, user'
cursor.execute(query, params)
rows = cursor.fetchall()
conn.close()
return [
{
'user': row[0],
'year': row[1],
'day': row[2],
'part': row[3],
'time_ns': row[4],
'generator_time_ns': row[5] if len(row) > 5 else 0,
'output_bytes': row[6] if len(row) > 6 else 0,
'git_rev': row[7] if len(row) > 7 else '',
'repo_url': row[8] if len(row) > 8 else '',
'timestamp': row[9] if len(row) > 9 else (row[7] if len(row) > 7 else '')
}
for row in rows
]
def get_historical_results(self, user: str, year: int, day: int, part: int) -> List[Dict]:
"""Get historical results for a specific user/day/part"""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute('''
SELECT time_ns, generator_time_ns, git_rev, repo_url, timestamp
FROM results
WHERE user = ? AND year = ? AND day = ? AND part = ?
ORDER BY timestamp DESC
''', (user, year, day, part))
rows = cursor.fetchall()
conn.close()
return [
{
'time_ns': row[0],
'generator_time_ns': row[1],
'git_rev': row[2],
'repo_url': row[3],
'timestamp': row[4]
}
for row in rows
]
def get_all_users(self) -> List[str]:
"""Get list of all users"""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute('SELECT DISTINCT user FROM results')
users = [row[0] for row in cursor.fetchall()]
conn.close()
return users
def get_all_years(self) -> List[int]:
"""Get list of all years"""
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
cursor.execute('SELECT DISTINCT year FROM results ORDER BY year')
years = [row[0] for row in cursor.fetchall()]
conn.close()
return years
class GitManager:
"""Manages git repository operations"""
@staticmethod
def clone_or_update_repo(url: str, local_path: str) -> bool:
"""Clone repository if it doesn't exist, or update if it does"""
local_path = Path(local_path)
if local_path.exists() and (local_path / '.git').exists():
# Update existing repository
logger.info(f"Updating repository: {local_path}")
try:
subprocess.run(
['git', 'fetch', 'origin'],
cwd=local_path,
check=True,
capture_output=True
)
subprocess.run(
['git', 'reset', '--hard', 'origin/master'],
cwd=local_path,
check=True,
capture_output=True
)
# Try main branch if master fails
except subprocess.CalledProcessError:
try:
subprocess.run(
['git', 'reset', '--hard', 'origin/main'],
cwd=local_path,
check=True,
capture_output=True
)
except subprocess.CalledProcessError as e:
logger.error(f"Failed to update {local_path}: {e}")
return False
return True
else:
# Clone new repository
logger.info(f"Cloning repository: {url} to {local_path}")
local_path.parent.mkdir(parents=True, exist_ok=True)
try:
subprocess.run(
['git', 'clone', url, str(local_path)],
check=True,
capture_output=True
)
return True
except subprocess.CalledProcessError as e:
logger.error(f"Failed to clone {url}: {e}")
return False
@staticmethod
def has_changes(url: str, local_path: str) -> bool:
"""Check if remote repository has changes"""
local_path = Path(local_path)
if not local_path.exists() or not (local_path / '.git').exists():
return True # Needs to be cloned
try:
# Fetch latest changes
subprocess.run(
['git', 'fetch', 'origin'],
cwd=local_path,
check=True,
capture_output=True
)
# Check if local is behind remote
result = subprocess.run(
['git', 'rev-list', '--count', 'HEAD..origin/master'],
cwd=local_path,
capture_output=True,
text=True
)
if result.returncode != 0:
# Try main branch
result = subprocess.run(
['git', 'rev-list', '--count', 'HEAD..origin/main'],
cwd=local_path,
capture_output=True,
text=True
)
if result.returncode == 0:
behind_count = int(result.stdout.strip())
return behind_count > 0
return False
except Exception as e:
logger.error(f"Error checking for changes: {e}")
return True # Assume changes to be safe
def has_year_changes(self, repo_path: Path, year: int, last_git_rev: str = "") -> bool:
"""Check if a specific year directory has changes since last_git_rev
Args:
repo_path: Path to the repository root
year: Year to check
last_git_rev: Last git revision we processed (empty string means check all changes)
Returns:
True if year directory has changes, False otherwise
"""
repo_path = Path(repo_path)
if not repo_path.exists() or not (repo_path / '.git').exists():
return True # Needs to be cloned
try:
# Check if year directory exists
year_dir = repo_path / str(year)
if not year_dir.exists() or not year_dir.is_dir():
return False # Year directory doesn't exist, no changes
# Get current HEAD revision
result = subprocess.run(
['git', 'rev-parse', '--short', 'HEAD'],
cwd=repo_path,
capture_output=True,
text=True
)
if result.returncode != 0:
return True # Can't determine, assume changes
current_rev = result.stdout.strip()
# If no last_git_rev, check if there are any commits affecting this year
if not last_git_rev:
# Check if year directory has any commits
result = subprocess.run(
['git', 'log', '--oneline', '--', str(year)],
cwd=repo_path,
capture_output=True,
text=True
)
return bool(result.stdout.strip())
# Check if current revision is different from last processed
if current_rev != last_git_rev:
# Check if year directory was modified between last_git_rev and current
result = subprocess.run(
['git', 'diff', '--name-only', last_git_rev, 'HEAD', '--', str(year)],
cwd=repo_path,
capture_output=True,
text=True
)
if result.returncode == 0:
return bool(result.stdout.strip())
return False
except Exception as e:
logger.error(f"Error checking year changes for {year}: {e}")
return True # Assume changes to be safe
class CargoAOCRunner:
"""Runs cargo-aoc benchmarks and parses results"""
@staticmethod
def _strip_ansi_codes(text: str) -> str:
"""Remove ANSI escape codes from text"""
import re
if not text:
return text
# Remove ANSI escape sequences - comprehensive pattern
# This pattern matches all ANSI escape codes including:
# - ESC[...m (SGR/color codes)
# - ESC[...K (EL codes)
# - ESC[...H (cursor positioning)
# - ESC[38;5;...m (256-color codes)
# - ESC[0m, ESC[1m, etc.
ansi_escape = re.compile(r'\x1b\[[0-9;]*[a-zA-Z]')
# Also catch standalone ESC sequences
ansi_escape2 = re.compile(r'\x1b(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
# Remove all ANSI codes
text = ansi_escape.sub('', text)
text = ansi_escape2.sub('', text)
return text
@staticmethod
def find_implemented_days(work_dir: Path) -> List[int]:
"""Find which days are implemented in the directory
Args:
work_dir: Directory to search (should be a year directory for single repos)
"""
days = []
work_dir = Path(work_dir)
# Look for common patterns: src/bin/day01.rs, src/day01.rs, etc.
patterns = [
'src/bin/day*.rs',
'src/day*.rs',
'**/src/bin/day*.rs',
'**/src/day*.rs',
]
for pattern in patterns:
for day_file in work_dir.glob(pattern):
match = re.search(r'day(\d+)', day_file.name)
if match:
day_num = int(match.group(1))
if day_num not in days:
days.append(day_num)
# Also check for Cargo.toml with day references
cargo_toml = work_dir / 'Cargo.toml'
if cargo_toml.exists():
with open(cargo_toml, 'r') as f:
content = f.read()
for match in re.finditer(r'day(\d+)', content):
day_num = int(match.group(1))
if day_num not in days:
days.append(day_num)
return sorted(days)
@staticmethod
def extract_years_from_repo(repo_path: Path) -> List[int]:
"""Try to extract year(s) from repository structure
For single repos, looks for year directories in the root (e.g., 2023/, 2024/)
"""
years = []
repo_path = Path(repo_path)
# Check for year directories in root (e.g., 2023/, 2024/)
# These should be directories with 4-digit year names
for item in repo_path.iterdir():
if item.is_dir() and not item.name.startswith('.'):
# Check if directory name is exactly a 4-digit year
if re.match(r'^\d{4}$', item.name):
year = int(item.name)
if 2015 <= year <= 2030: # Reasonable range
years.append(year)
# Also check path name as fallback
if not years:
path_str = str(repo_path)
for year_match in re.finditer(r'(\d{4})', path_str):
year = int(year_match.group(1))
if 2015 <= year <= 2030 and year not in years:
years.append(year)
return sorted(years) if years else []
@staticmethod
def get_git_rev(repo_path: Path) -> str:
"""Get short git revision hash"""
try:
result = subprocess.run(
['git', 'rev-parse', '--short', 'HEAD'],
cwd=repo_path,
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
return result.stdout.strip()
except Exception as e:
logger.warning(f"Could not get git rev for {repo_path}: {e}")
return ""
@staticmethod
def get_recent_commits(repo_path: Path, count: int = 2) -> List[dict]:
"""Get recent git commit messages and timestamps"""
commits = []
try:
result = subprocess.run(
['git', 'log', f'-{count}', '--pretty=format:%H|%s|%ai', '--date=iso'],
cwd=repo_path,
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0:
for line in result.stdout.strip().split('\n'):
if line:
parts = line.split('|', 2)
if len(parts) >= 3:
commits.append({
'hash': parts[0][:7], # Short hash
'message': parts[1],
'timestamp': parts[2]
})
elif len(parts) == 2:
commits.append({
'hash': parts[0][:7],
'message': parts[1],
'timestamp': ''
})
except Exception as e:
logger.warning(f"Could not get recent commits for {repo_path}: {e}")
return commits
@staticmethod
def _run_cargo_aoc_in_container(work_dir: Path, day: int, repo_root: Path, docker_config: dict, user: str = "unknown", year: int = 0) -> subprocess.CompletedProcess:
"""Run cargo aoc in a Podman container for security
Args:
work_dir: Working directory (year directory) - can be absolute or relative
day: Day number to run
repo_root: Absolute path to repository root
docker_config: Podman configuration dictionary
user: User name for build cache directory organization
year: Year for build cache directory organization
Returns:
CompletedProcess with stdout, stderr, returncode
"""
repo_root = Path(repo_root).resolve()
work_dir = Path(work_dir).resolve()
# Ensure work_dir is under repo_root
try:
work_dir_rel = str(work_dir.relative_to(repo_root))
except ValueError:
# If work_dir is not under repo_root, this is an error
raise ValueError(f"work_dir {work_dir} is not under repo_root {repo_root}")
# Determine build cache directory
build_cache_dir = docker_config.get('build_cache_dir', '')
use_temp_build = False
temp_build_dir = None
if build_cache_dir:
# Use persistent build cache directory with user/year subdirectories
base_cache_path = Path(build_cache_dir).resolve()
# Create user/year specific directory: build_cache_dir/user/year/
build_cache_path = base_cache_path / user / str(year)
build_cache_path.mkdir(parents=True, exist_ok=True)
logger.info(f"Using persistent build cache: {build_cache_path} (user: {user}, year: {year})")
else:
# Create a temporary directory for cargo build artifacts (outside repo)
import tempfile
temp_build_dir = tempfile.mkdtemp(prefix=f'cargo-aoc-build-{user}-{year}-')
build_cache_path = Path(temp_build_dir)
use_temp_build = True
logger.info(f"Using temporary build cache: {build_cache_path}")
# Determine registry cache directory
registry_cache_dir = docker_config.get('registry_cache_dir', '')
try:
# Build Podman command
podman_image = docker_config.get('image', 'aocsync:latest')
# Check if image has cargo-aoc pre-installed (aocsync:latest) or needs installation
needs_cargo_aoc_install = podman_image == 'rust:latest' or not podman_image.startswith('aocsync')
podman_cmd = [
'podman', 'run',
'--rm', # Remove container after execution
#'--network=none', # No network access
'--memory', docker_config.get('memory', '2g'), # Limit memory
'--cpus', str(docker_config.get('cpus', '2')), # Limit CPU
'--read-only', # Read-only root filesystem
'--tmpfs', '/tmp:rw,noexec,nosuid,size=1g', # Writable /tmp for cargo
'-v', f'{repo_root}:/repo:rw', # Mount repo read-write (cargo-aoc needs to write metadata files)
'-v', f'{build_cache_path}:/build:rw', # Writable build directory
'-w', f'/repo/{work_dir_rel}', # Working directory in container
]
# Handle cargo registry cache and cargo home
if registry_cache_dir:
# Use persistent registry cache - mount entire CARGO_HOME directory
registry_cache_path = Path(registry_cache_dir).resolve()
registry_cache_path.mkdir(parents=True, exist_ok=True)
# Mount the parent directory as CARGO_HOME so both registry and bin are persisted
cargo_home_path = registry_cache_path.parent / 'cargo-home'
cargo_home_path.mkdir(parents=True, exist_ok=True)
podman_cmd.extend(['-v', f'{cargo_home_path}:/root/.cargo:rw'])
logger.info(f"Using persistent cargo home (registry + bin): {cargo_home_path}")
else:
# Use tmpfs for cargo home (cleared after each run)
podman_cmd.extend(['--tmpfs', '/root/.cargo:rw,noexec,nosuid,size=200m'])
# Note: cargo-aoc installation location is handled above via CARGO_HOME mount
# If using persistent cache, cargo bin is already mounted via cargo-home
# If using tmpfs, cargo bin is already included in /root/.cargo tmpfs
# Set environment variables in the container so cargo uses the mounted directories
# Set build directory (target directory for compiled artifacts)
podman_cmd.extend(['-e', 'CARGO_TARGET_DIR=/build/target'])
# Set cargo home to use the mounted registry cache
podman_cmd.extend(['-e', 'CARGO_HOME=/root/.cargo'])
# Add Podman image and command
if needs_cargo_aoc_install:
# Install cargo-aoc first if not pre-installed (slower, but works with rust:latest)
# Check if already installed to avoid reinstalling every time
podman_cmd.extend([
podman_image,
'sh', '-c', f'if ! command -v cargo-aoc >/dev/null 2>&1; then cargo install --quiet --git https://github.com/ggriffiniii/cargo-aoc cargo-aoc 2>/dev/null || true; fi; cargo aoc bench -d {day} -- --quick'
])
else:
# Use pre-installed cargo-aoc (faster, requires aocsync:latest image)
podman_cmd.extend([
podman_image,
'cargo', 'aoc', 'bench', '-d', str(day), '--', '--quick'
])
result = subprocess.run(
podman_cmd,
capture_output=True,
text=True,
timeout=300 # 5 minute timeout
)
return result
finally:
# Clean up temporary build directory if we created one
if use_temp_build and temp_build_dir:
try:
shutil.rmtree(temp_build_dir)
except Exception as e:
logger.warning(f"Failed to clean up temp build directory {temp_build_dir}: {e}")
@staticmethod
def run_benchmarks(repo_path: Path, year: int, user: str = "unknown",
repo_url: str = "", is_multi_year: bool = False,
log_file: Optional[Path] = None, docker_config: Optional[dict] = None) -> List[PerformanceResult]:
"""Run cargo aoc benchmarks and parse results
Args:
repo_path: Path to the repository root (for single repos) or year directory (for multi-year repos)
year: The year to benchmark
user: User name for the results
repo_url: Repository URL for linking
is_multi_year: True if this is a multi-year repo (repo_path is already the year directory)
log_file: Optional path to log file to append cargo aoc output to
"""
results = []
repo_path = Path(repo_path).resolve()
# Get git revision
git_rev = CargoAOCRunner.get_git_rev(repo_path)
# Determine the working directory and repo root
if is_multi_year:
# For multi-year repos, repo_path is already the year directory
work_dir = repo_path
repo_root = repo_path # For multi-year, repo_path is the repo root
else:
# For single repos, check if we need to navigate to a year subdirectory
repo_root = repo_path # Repo root is the repo_path
work_dir = repo_path
year_dir = repo_path / str(year)
if year_dir.exists() and year_dir.is_dir():
work_dir = year_dir
logger.info(f"Using year directory: {work_dir}")
# Get git rev from repo root, not year directory
git_rev = CargoAOCRunner.get_git_rev(repo_path)
if not (work_dir / 'Cargo.toml').exists():
logger.warning(f"No Cargo.toml found in {work_dir}")
return results
days = CargoAOCRunner.find_implemented_days(work_dir)
logger.info(f"Found {len(days)} implemented days in {work_dir}")
for day in days:
try:
logger.info(f"Running cargo aoc bench for {user} year {year} day {day} in {work_dir} (in Podman container)")
# Run cargo aoc in a Podman container for security
# Use default docker_config if not provided
if docker_config is None:
docker_config = {
'build_cache_dir': '',
'registry_cache_dir': '',
'memory': '2g',
'cpus': '2',
'image': 'aocsync:latest'
}
result = CargoAOCRunner._run_cargo_aoc_in_container(work_dir, day, repo_root, docker_config, user, year)
# Write to log file if provided
if log_file:
timestamp = datetime.now().isoformat()
# Strip ANSI codes from output before writing
stdout_clean = CargoAOCRunner._strip_ansi_codes(result.stdout or "")
stderr_clean = CargoAOCRunner._strip_ansi_codes(result.stderr or "")
with open(log_file, 'a', encoding='utf-8') as f:
f.write(f"\n{'='*80}\n")
f.write(f"[{timestamp}] {user} - Year {year} - Day {day}\n")
f.write(f"Command: cargo aoc bench -d {day} -- --quick (in Podman container)\n")
f.write(f"Working Directory: {work_dir}\n")
f.write(f"Return Code: {result.returncode}\n")
f.write(f"{'='*80}\n")
if result.stdout: # Check original, write cleaned version
f.write("STDOUT:\n")
f.write(stdout_clean)
f.write("\n")
if result.stderr: # Check original, write cleaned version
f.write("STDERR:\n")
f.write(stderr_clean)
f.write("\n")
f.write(f"{'='*80}\n\n")
if result.returncode != 0:
logger.warning(f"cargo aoc bench failed for day {day} in {work_dir} (return code: {result.returncode}). Will still attempt to parse any available timing data.")
# Log output for debugging if no results found
if not result.stdout.strip() and not result.stderr.strip():
logger.warning(f"No output from cargo aoc bench for {user} year {year} day {day}")
# Skip parsing if there's no output at all
if result.returncode != 0:
continue
# Strip ANSI codes before parsing (for cleaner parsing)
stdout_clean = CargoAOCRunner._strip_ansi_codes(result.stdout or "")
stderr_clean = CargoAOCRunner._strip_ansi_codes(result.stderr or "")
# Count bytes in stdout output (original, before ANSI stripping)
output_bytes = len(result.stdout.encode('utf-8')) if result.stdout else 0
# Parse output for runtime information
# Even if return code is non-zero (e.g., Part 2 panics), Part 1 timing might still be in output
day_results = CargoAOCRunner._parse_runtime_output(
stdout_clean, stderr_clean, day, year, user, git_rev, repo_url, output_bytes
)
if day_results:
logger.info(f"Parsed {len(day_results)} runtime result(s) for {user} year {year} day {day}")
if result.returncode != 0:
# Log which parts were successfully parsed despite the error
parts_parsed = [f"Part {r.part}" for r in day_results]
logger.info(f"Successfully parsed timing for {', '.join(parts_parsed)} despite non-zero return code")
else:
# Log a sample of the output to help debug parsing issues
output_sample = (result.stdout + "\n" + result.stderr).strip()[:500]
logger.warning(f"No runtime data parsed for {user} year {year} day {day}. Output sample: {output_sample}")
# Only skip if we got no results AND there was an error
if result.returncode != 0:
continue
results.extend(day_results)
except subprocess.TimeoutExpired:
error_msg = f"Timeout running cargo aoc bench for day {day}"
logger.error(error_msg)
if log_file:
timestamp = datetime.now().isoformat()
with open(log_file, 'a', encoding='utf-8') as f:
f.write(f"\n[{timestamp}] ERROR: {error_msg}\n")
except Exception as e:
error_msg = f"Error running cargo aoc bench for day {day}: {e}"
logger.error(error_msg)
if log_file:
timestamp = datetime.now().isoformat()
with open(log_file, 'a', encoding='utf-8') as f:
f.write(f"\n[{timestamp}] ERROR: {error_msg}\n")
return results
@staticmethod
def _parse_runtime_output(stdout: str, stderr: str, day: int, year: int,
user: str, git_rev: str = "", repo_url: str = "", output_bytes: int = 0) -> List[PerformanceResult]:
"""Parse cargo aoc bench runtime output
cargo aoc bench -- --quick outputs timing information in format like:
- "Day8 - Part1/(default) time: [15.127 ms 15.168 ms 15.331 ms]"
- "Day8 - Part2/(default) time: [15.141 ms 15.160 ms 15.164 ms]"
Extracts the middle measurement (second value) from the three measurements.
Also supports legacy cargo-aoc custom format for backward compatibility:
- "Day X - Part Y: XXX.XXX ms"
- "Day X - Part Y: XXX.XXX μs"
- "Day X - Part Y: XXX.XXX ns"
- "Part Y: XXX.XXX ms"
- Or similar formats
"""
results = []
timestamp = datetime.now().isoformat()
# Combine stdout and stderr (timing info might be in either)
output = stdout + "\n" + stderr
# First, try to parse cargo aoc bench output format
# Pattern: Day<X> - Part<Y>/(default) time: [<val1> <unit> <val2> <unit> <val3> <unit>]
# Example: Day8 - Part1/(default) time: [15.127 ms 15.168 ms 15.331 ms]
# Extract the middle measurement (val2)
cargo_bench_pattern = r'Day\s*(\d+)\s*-\s*Part\s*(\d+).*?time:\s*\[([\d.]+)\s+(\w+)\s+([\d.]+)\s+(\w+)\s+([\d.]+)\s+(\w+)\]'
for match in re.finditer(cargo_bench_pattern, output, re.IGNORECASE | re.MULTILINE):
bench_day = int(match.group(1))
bench_part = int(match.group(2))
# Extract middle value (group 5) and its unit (group 6)
time_str = match.group(5)
unit = match.group(6).lower()
try:
time_val = float(time_str)
time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit)
# Avoid duplicates
if not any(r.day == bench_day and r.part == bench_part
for r in results):
results.append(PerformanceResult(
user=user,
year=year,
day=bench_day,
part=bench_part,
time_ns=time_ns,
output_bytes=output_bytes,
git_rev=git_rev,
repo_url=repo_url,
timestamp=timestamp
))
except ValueError:
logger.warning(f"Could not parse cargo bench time: {time_str} {unit}")
# If we found results with cargo bench format, return them
if results:
return results
# Patterns to match various cargo-aoc output formats
# Common formats:
# "Day 1 - Part 1: 123.456 ms"
# "Day 1 Part 1: 123.456 ms"
# "day 1 - part 1: 123.456 ms"
# "Part 1: 123.456 ms" (when day is already known)
# Also handle formats like "Day 01 - Part 1" or "Day 1, Part 1"
# And the format with generator/runner on separate lines:
# "Day 2 - Part 1 : <answer>"
# " generator: 5.651µs,"
# " runner: 3.07µs"
patterns = [
# Format with generator/runner on separate lines (most common cargo-aoc format)
# Match "Day X - Part Y" followed by lines with "runner:" or "generator:"
r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]+.*?(?:^|\n).*?runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'day\s+(\d+)\s*-\s*part\s+(\d+)[:\s]+.*?(?:^|\n).*?runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
# Standalone runner/generator lines (for when we're already in a Day X - Part Y block)
r'runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'generator\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
# Full format with day and part - various separators
r'Day\s+(\d+)\s*[-,\s]+\s*Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'day\s+(\d+)\s*[-,\s]+\s*part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'Day\s+(\d+)\s+Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'day\s+(\d+)\s+part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'day(\d+)\s*[-,\s]+\s*part(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
# Part only (use provided day) - more flexible
r'Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
# Handle formats without explicit "Part" label
r'Day\s+(\d+)\s*[-,\s]+\s*(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
# Handle formats with parentheses or brackets
r'\(Part\s+(\d+)\)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'\[Part\s+(\d+)\][:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
]
# First, try to parse the generator/runner format which is most common
# Look for "Day X - Part Y" lines and extract both generator and runner times
# output_bytes parameter contains the total stdout bytes for this day run
lines = output.split('\n')
current_day = None
current_part = None
actual_day = day # Default to provided day
generator_time_ns = 0
runner_time_ns = 0
for i, line in enumerate(lines):
# Check if this line starts a new Day/Part block
day_part_match = re.match(r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]', line, re.IGNORECASE)
if day_part_match:
# Save previous part's data if we have it
if current_day is not None and current_part is not None and runner_time_ns > 0:
results.append(PerformanceResult(
user=user,
year=year,
day=actual_day,
part=current_part,
time_ns=runner_time_ns,
generator_time_ns=generator_time_ns,
output_bytes=output_bytes,
git_rev=git_rev,
repo_url=repo_url,
timestamp=timestamp
))
# Start new part
current_day = int(day_part_match.group(1))
current_part = int(day_part_match.group(2))
actual_day = current_day if current_day > 0 and current_day <= 25 else day
generator_time_ns = 0
runner_time_ns = 0
continue
# If we're in a Day/Part block, look for generator and runner timing
if current_day is not None and current_part is not None:
generator_match = re.search(r'generator\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', line, re.IGNORECASE)
if generator_match:
time_str = generator_match.group(1)
unit = generator_match.group(2).lower()
try:
time_val = float(time_str)
generator_time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit)
except ValueError:
logger.warning(f"Could not parse generator time: {time_str}")
runner_match = re.search(r'runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', line, re.IGNORECASE)
if runner_match:
time_str = runner_match.group(1)
unit = runner_match.group(2).lower()
try:
time_val = float(time_str)
runner_time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit)
except ValueError:
logger.warning(f"Could not parse runner time: {time_str}")
# Save the last part's data
if current_day is not None and current_part is not None and runner_time_ns > 0:
results.append(PerformanceResult(
user=user,
year=year,
day=actual_day,
part=current_part,
time_ns=runner_time_ns,
generator_time_ns=generator_time_ns,
output_bytes=output_bytes,
git_rev=git_rev,
repo_url=repo_url,
timestamp=timestamp
))
# If we found results with the line-by-line approach, return them
if results:
return results
# Otherwise, try the original pattern-based approach
for pattern in patterns:
for match in re.finditer(pattern, output, re.IGNORECASE | re.MULTILINE):
groups = match.groups()
# Determine day and part based on pattern
if len(groups) == 4:
# Pattern with day and part
part_day = int(groups[0])
part_num = int(groups[1])
time_str = groups[2]
unit = groups[3].lower()
actual_day = part_day if part_day > 0 and part_day <= 25 else day
elif len(groups) == 3:
# Pattern with only part (use provided day)
part_num = int(groups[0])
time_str = groups[1]
unit = groups[2].lower()
actual_day = day
elif len(groups) == 2:
# Standalone runner/generator line (use provided day, assume part from context)
# This is tricky - we'll skip these and rely on the block-based approach above
continue
else:
continue
try:
time_val = float(time_str)
# Convert to nanoseconds
if unit == 's' or unit == 'sec' or unit == 'second':
time_ns = int(time_val * 1_000_000_000)
elif unit == 'ms' or unit == 'millisecond':
time_ns = int(time_val * 1_000_000)
elif unit == 'μs' or unit == 'us' or unit == 'microsecond':
time_ns = int(time_val * 1_000)
elif unit == 'ns' or unit == 'nanosecond':
time_ns = int(time_val)
else:
# Default to nanoseconds if unit unclear
logger.warning(f"Unknown time unit '{unit}', assuming nanoseconds")
time_ns = int(time_val)
# Avoid duplicates
if not any(r.day == actual_day and r.part == part_num
for r in results):
results.append(PerformanceResult(
user=user,
year=year,
day=actual_day,
part=part_num,
time_ns=time_ns,
output_bytes=output_bytes,
timestamp=timestamp
))
except ValueError:
logger.warning(f"Could not parse time: {time_str}")
# If no results found, try a more lenient approach - look for any numbers with time units
if not results:
# Look for patterns like "123.456ms" or "123.456 ms" anywhere in output
lenient_patterns = [
r'([\d.]+)\s*(ns|μs|us|ms|s|sec)',
r'([\d.]+)(ns|μs|us|ms|s|sec)',
]
# Try to extract parts sequentially if we find timing info
for pattern in lenient_patterns:
matches = list(re.finditer(pattern, output, re.IGNORECASE))
if matches:
# If we find exactly 1 or 2 matches, assume they're Part 1 and Part 2
if len(matches) == 1:
match = matches[0]
time_val = float(match.group(1))
unit = match.group(2).lower()
time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit)
results.append(PerformanceResult(
user=user,
year=year,
day=day,
part=1,
time_ns=time_ns,
output_bytes=output_bytes,
timestamp=timestamp
))
elif len(matches) == 2:
for idx, match in enumerate(matches, 1):
time_val = float(match.group(1))
unit = match.group(2).lower()
time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit)
results.append(PerformanceResult(
user=user,
year=year,
day=day,
part=idx,
time_ns=time_ns,
output_bytes=output_bytes,
timestamp=timestamp
))
break
return results
@staticmethod
def _convert_to_nanoseconds(time_val: float, unit: str) -> int:
"""Convert time value to nanoseconds based on unit"""
unit = unit.lower()
# Handle unicode micro symbol (µ) and regular u
if unit == 's' or unit == 'sec' or unit == 'second':
return int(time_val * 1_000_000_000)
elif unit == 'ms' or unit == 'millisecond':
return int(time_val * 1_000_000)
elif unit == 'μs' or unit == 'µs' or unit == 'us' or unit == 'microsecond':
return int(time_val * 1_000)
elif unit == 'ns' or unit == 'nanosecond':
return int(time_val)
else:
# Default to nanoseconds
return int(time_val)
class HTMLGenerator:
"""Generates HTML comparison pages"""
def __init__(self, output_dir: str):
self.output_dir = Path(output_dir)
self.output_dir.mkdir(parents=True, exist_ok=True)
def generate(self, db: Database, config: Config):
"""Generate HTML comparison page"""
# Get all years from database, but filter by compare_years if specified
all_years_in_db = db.get_all_years()
if config.compare_years:
# Only include years that are both in compare_years AND in the database
years = [y for y in config.compare_years if y in all_years_in_db]
if not years:
logger.warning(f"compare_years {config.compare_years} specified but no matching data found. Using all years from database.")
years = all_years_in_db
else:
# Use all years from database
years = all_years_in_db
days = config.compare_days
results = db.get_latest_results(years=None, days=days) # Get all years, filter in Python
# Filter results by years if needed
if years:
results = [r for r in results if r['year'] in years]
users = db.get_all_users()
# Organize data by year -> day -> part -> user
data = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
for result in results:
year = result['year']
day = result['day']
part = result['part']
user = result['user']
runner_time_ns = result['time_ns']
generator_time_ns = result.get('generator_time_ns', 0)
output_bytes = result.get('output_bytes', 0)
git_rev = result.get('git_rev', '')
repo_url = result.get('repo_url', '')
# Only store if runner_time_ns > 0 (valid result)
# Store total time (generator + runner) for comparison
if runner_time_ns > 0:
total_time_ns = runner_time_ns + generator_time_ns
data[year][day][part][user] = {
'total': total_time_ns,
'runner': runner_time_ns,
'generator': generator_time_ns,
'output_bytes': output_bytes,
'git_rev': git_rev,
'repo_url': repo_url
}
html = self._generate_html(data, years, users, db, config, results)
output_file = self.output_dir / 'index.html'
with open(output_file, 'w') as f:
f.write(html)
logger.info(f"Generated HTML report: {output_file}")
@staticmethod
def _generate_svg_graph(data_points: List[dict]) -> str:
"""Generate an SVG line graph showing performance over time"""
if len(data_points) < 2:
return ""
# Graph dimensions - make responsive to fit in modal
# Modal has max-width 600px with 20px padding, so max SVG width should be ~560px
width = 560
height = 200
padding = 40
graph_width = width - 2 * padding
graph_height = height - 2 * padding
# Extract time values
times = [dp['time_ns'] for dp in data_points]
min_time = min(times)
max_time = max(times)
# Add 10% padding to range for better visualization, or minimum 1% of max value
time_range = max_time - min_time if max_time > min_time else max(max_time * 0.01, 1)
if time_range > 0:
padding_amount = time_range * 0.1
min_time = max(0, min_time - padding_amount)
max_time = max_time + padding_amount
time_range = max_time - min_time
# Format time for display
def format_time(ns):
ms = ns / 1_000_000
us = ns / 1_000
if ms >= 1:
return f"{ms:.2f}ms"
elif us >= 1:
return f"{us:.2f}μs"
else:
return f"{ns}ns"
# Generate SVG - make it responsive
svg_parts = []
svg_parts.append(f'<svg width="100%" height="{height}" viewBox="0 0 {width} {height}" preserveAspectRatio="xMidYMid meet" style="border: 1px solid #ddd; background: #fafafa; max-width: 100%;">')
# Draw axes
svg_parts.append(f'<line x1="{padding}" y1="{padding}" x2="{padding}" y2="{height - padding}" stroke="#333" stroke-width="2"/>') # Y-axis
svg_parts.append(f'<line x1="{padding}" y1="{height - padding}" x2="{width - padding}" y2="{height - padding}" stroke="#333" stroke-width="2"/>') # X-axis
# Draw grid lines and labels
num_grid_lines = 5
for i in range(num_grid_lines + 1):
y_pos = padding + (graph_height * i / num_grid_lines)
time_val = max_time - (time_range * i / num_grid_lines)
# Grid line
if i < num_grid_lines:
svg_parts.append(f'<line x1="{padding}" y1="{y_pos}" x2="{width - padding}" y2="{y_pos}" stroke="#e0e0e0" stroke-width="1"/>')
# Y-axis label
svg_parts.append(f'<text x="{padding - 5}" y="{y_pos + 4}" text-anchor="end" font-size="10" fill="#666">{format_time(time_val)}</text>')
# Draw data points and line
points = []
for i, dp in enumerate(data_points):
x = padding + (graph_width * i / (len(data_points) - 1)) if len(data_points) > 1 else padding
y = padding + graph_height - (graph_height * (dp['time_ns'] - min_time) / time_range)
points.append((x, y))
# Draw line connecting points
if len(points) > 1:
path_d = f"M {points[0][0]} {points[0][1]}"
for x, y in points[1:]:
path_d += f" L {x} {y}"
svg_parts.append(f'<path d="{path_d}" fill="none" stroke="#667eea" stroke-width="2"/>')
# Draw points
for x, y in points:
svg_parts.append(f'<circle cx="{x}" cy="{y}" r="4" fill="#667eea" stroke="#fff" stroke-width="2"/>')
# X-axis labels (show first, middle, last)
if len(data_points) > 0:
indices_to_label = [0]
if len(data_points) > 2:
indices_to_label.append(len(data_points) // 2)
if len(data_points) > 1:
indices_to_label.append(len(data_points) - 1)
for idx in indices_to_label:
x = padding + (graph_width * idx / (len(data_points) - 1)) if len(data_points) > 1 else padding
date_str = data_points[idx]['date'][:10] if data_points[idx]['date'] else ''
svg_parts.append(f'<text x="{x}" y="{height - padding + 15}" text-anchor="middle" font-size="9" fill="#666">{date_str}</text>')
svg_parts.append('</svg>')
return ''.join(svg_parts)
def _generate_compact_commits_for_year(self, config: Config, year: int) -> str:
"""Generate compact commit log for a specific year"""
commits_data = []
# Collect commits for all repositories for this year
for repo_config in config.repositories:
repo_type = repo_config.get('type', 'single')
user_name = repo_config['name']
if repo_type == 'single':
# Check if this repo has data for this year
local_path = repo_config.get('local_path', '')
repo_url = repo_config.get('url', '')
if local_path:
repo_path = Path(local_path)
# Check if year directory exists
year_dir = repo_path / str(year)
if year_dir.exists() and year_dir.is_dir():
commits = CargoAOCRunner.get_recent_commits(repo_path, count=2)
if commits:
commits_data.append({
'user': user_name,
'repo_url': repo_url,
'commits': commits
})
elif repo_type == 'multi-year':
years_config = repo_config.get('years', [])
for year_config in years_config:
if year_config['year'] == year:
local_path = year_config.get('local_path', '')
repo_url = year_config.get('url', '')
if local_path:
repo_path = Path(local_path)
if repo_path.exists():
commits = CargoAOCRunner.get_recent_commits(repo_path, count=2)
if commits:
commits_data.append({
'user': user_name,
'repo_url': repo_url,
'commits': commits
})
# Generate HTML if we have any commits
if not commits_data:
return ''
html = '<div class="compact-commits">'
for commit_info in commits_data:
user = commit_info['user']
repo_url = commit_info['repo_url']
commits = commit_info['commits']
html += f'<div class="commit-entry"><strong>{user}:</strong> '
commit_links = []
for commit in commits:
commit_time = commit['timestamp'][:16] if commit['timestamp'] else '' # YYYY-MM-DD HH:MM
# Get first line of commit message (split by newline and take first part)
commit_msg_full = commit['message']
commit_msg_first_line = commit_msg_full.split('\n')[0] if commit_msg_full else ''
commit_msg_display = commit_msg_first_line[:60] + '...' if len(commit_msg_first_line) > 60 else commit_msg_first_line
commit_hash = commit['hash']
commit_url = f"{repo_url.rstrip('/')}/commit/{commit_hash}" if repo_url else '#'
commit_links.append(f'<a href="{commit_url}" target="_blank">{commit_hash}</a> ({commit_time}) <span class="commit-message">{commit_msg_display}</span>')
html += ' | '.join(commit_links)
html += '</div>'
html += '</div>'
return html
def _generate_html(self, data: dict, years: List[int], users: List[str], db: Database, config: Config, results: List[dict]) -> str:
"""Generate HTML content"""
# Get refresh interval from config (default 5 minutes = 300 seconds)
refresh_interval = config.config.get('html_refresh_interval', 300)
# Sort years descending (most recent first)
sorted_years = sorted(years, reverse=True)
# Calculate summary statistics
total_days = sum(len(data[year]) for year in data)
total_parts = sum(len(parts) for year in data for day in data[year].values() for parts in day.values())
users_with_data = set()
for year in data.values():
for day in year.values():
for part in day.values():
users_with_data.update(part.keys())
# Calculate stars (completed parts) per user per year
stars_by_user_year = defaultdict(lambda: defaultdict(int))
for year in data:
for day in data[year].values():
for part in day.values():
for user, time_data in part.items():
if isinstance(time_data, dict):
total_time = time_data.get('total', 0)
else:
total_time = time_data if time_data > 0 else 0
if total_time > 0:
stars_by_user_year[user][year] += 1
# Check if log file exists
log_file_path = Path(config.output_dir) / 'cargo-aoc.log'
log_file_exists = log_file_path.exists()
html = f"""<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<meta http-equiv="refresh" content="{refresh_interval}">
<title>Advent of Code Performance Comparison</title>
<style>
* {{
margin: 0;
padding: 0;
box-sizing: border-box;
}}
body {{
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 10px;
}}
.container {{
max-width: 1400px;
margin: 0 auto;
background: white;
border-radius: 8px;
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
padding: 15px;
}}
h1 {{
color: #333;
margin-bottom: 5px;
font-size: 1.8em;
}}
.subtitle {{
color: #666;
margin-bottom: 15px;
font-size: 0.9em;
}}
.nav-bar {{
margin-bottom: 15px;
padding: 10px;
background: #f8f9fa;
border-radius: 6px;
position: sticky;
top: 10px;
z-index: 100;
}}
.nav-bar h3 {{
font-size: 0.9em;
color: #555;
margin-bottom: 8px;
}}
.nav-links {{
display: flex;
flex-wrap: wrap;
gap: 8px;
}}
.nav-link {{
padding: 4px 12px;
background: #667eea;
color: white;
text-decoration: none;
border-radius: 4px;
font-size: 0.85em;
transition: background 0.2s;
}}
.nav-link:hover {{
background: #5568d3;
}}
.controls {{
margin-bottom: 15px;
padding: 10px;
background: #f8f9fa;
border-radius: 6px;
font-size: 0.85em;
}}
.year-section {{
margin-bottom: 25px;
}}
.year-header {{
font-size: 1.4em;
color: #667eea;
margin-bottom: 10px;
padding-bottom: 5px;
border-bottom: 2px solid #667eea;
}}
.day-section {{
margin-bottom: 15px;
padding: 10px;
background: #f8f9fa;
border-radius: 6px;
}}
.day-header {{
font-size: 1.1em;
color: #333;
margin-bottom: 8px;
font-weight: 600;
}}
.part-section {{
margin-bottom: 12px;
}}
.part-header {{
font-size: 0.95em;
color: #555;
margin-bottom: 5px;
font-weight: 600;
}}
th small {{
font-weight: normal;
font-size: 0.7em;
}}
th small a {{
color: #fff;
opacity: 0.8;
}}
th small a:hover {{
opacity: 1;
text-decoration: underline;
}}
table {{
width: 100%;
border-collapse: collapse;
margin-bottom: 8px;
background: white;
border-radius: 4px;
overflow: hidden;
font-size: 0.85em;
}}
th {{
background: #667eea;
color: white;
padding: 6px 8px;
text-align: left;
font-weight: 600;
}}
td {{
padding: 6px 8px;
border-bottom: 1px solid #e0e0e0;
}}
tr:hover {{
background: #f5f5f5;
}}
.time {{
font-family: 'Courier New', monospace;
font-weight: bold;
font-size: 0.9em;
}}
a {{
color: #667eea;
text-decoration: none;
}}
a:hover {{
text-decoration: underline;
}}
.fastest {{
background: #d4edda !important;
color: #155724;
}}
.slowest {{
background: #f8d7da !important;
color: #721c24;
}}
.no-data {{
color: #999;
font-style: italic;
}}
.history-link {{
font-size: 0.75em;
color: #666;
margin-left: 5px;
cursor: pointer;
}}
.history-link:hover {{
color: #667eea;
text-decoration: underline;
}}
.modal {{
display: none;
position: fixed;
z-index: 1000;
left: 0;
top: 0;
width: 100%;
height: 100%;
overflow: auto;
background-color: rgba(0,0,0,0.5);
}}
.modal.show {{
display: block;
}}
.modal-content {{
background-color: #fefefe;
margin: 5% auto;
padding: 20px;
border: 1px solid #888;
border-radius: 8px;
width: 80%;
max-width: 600px;
max-height: 80vh;
overflow-y: auto;
position: relative;
}}
.modal-header {{
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 15px;
padding-bottom: 10px;
border-bottom: 2px solid #667eea;
}}
.modal-title {{
font-size: 1.2em;
font-weight: bold;
color: #333;
}}
.modal-close {{
color: #aaa;
font-size: 28px;
font-weight: bold;
cursor: pointer;
line-height: 20px;
}}
.modal-close:hover,
.modal-close:focus {{
color: #000;
text-decoration: none;
}}
.history-item {{
padding: 8px 0;
border-bottom: 1px solid #ddd;
}}
.history-item:last-child {{
border-bottom: none;
}}
.history-graph {{
margin: 15px 0;
padding: 10px;
background: white;
border-radius: 4px;
overflow-x: auto;
overflow-y: visible;
}}
.history-graph svg {{
display: block;
width: 100%;
max-width: 100%;
height: auto;
}}
.compact-commits {{
margin-bottom: 10px;
padding: 8px;
background: #f8f9fa;
border-radius: 4px;
font-size: 0.8em;
border-left: 3px solid #667eea;
}}
.commit-entry {{
margin: 3px 0;
color: #555;
}}
.commit-entry strong {{
color: #333;
margin-right: 5px;
}}
.commit-entry a {{
color: #667eea;
font-family: 'Courier New', monospace;
text-decoration: none;
margin: 0 3px;
font-weight: bold;
}}
.commit-entry a:hover {{
text-decoration: underline;
}}
.commit-entry .commit-message {{
color: #666;
font-style: italic;
margin-left: 5px;
}}
.summary {{
margin-top: 30px;
padding: 15px;
background: #e3f2fd;
border-radius: 6px;
border-left: 4px solid #2196f3;
}}
.summary h3 {{
color: #1976d2;
margin-bottom: 10px;
font-size: 1.1em;
}}
.summary-stats {{
display: grid;
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
gap: 10px;
margin-top: 10px;
}}
.stat-item {{
background: white;
padding: 10px;
border-radius: 4px;
box-shadow: 0 1px 3px rgba(0,0,0,0.1);
}}
.stat-label {{
font-size: 0.8em;
color: #666;
margin-bottom: 3px;
}}
.stat-value {{
font-size: 1.3em;
font-weight: bold;
color: #333;
}}
.collapsible-header {{
cursor: pointer;
user-select: none;
display: flex;
align-items: center;
gap: 8px;
}}
.collapsible-header:hover {{
opacity: 0.8;
}}
.collapsible-arrow {{
display: inline-block;
transition: transform 0.2s ease;
font-size: 0.9em;
}}
.collapsible-arrow.expanded {{
transform: rotate(90deg);
}}
.collapsible-content {{
display: none;
margin-top: 10px;
}}
.collapsible-content.expanded {{
display: block;
}}
</style>
<script>
function showHistory(user, year, day, part) {{
const id = `history-${{user}}-${{year}}-${{day}}-${{part}}`;
const modal = document.getElementById(id);
if (modal) {{
modal.classList.add('show');
}}
}}
function closeHistory(user, year, day, part) {{
const id = `history-${{user}}-${{year}}-${{day}}-${{part}}`;
const modal = document.getElementById(id);
if (modal) {{
modal.classList.remove('show');
}}
}}
function toggleCollapsible(element) {{
const content = element.nextElementSibling;
const arrow = element.querySelector('.collapsible-arrow');
if (content && content.classList.contains('collapsible-content')) {{
content.classList.toggle('expanded');
if (arrow) {{
arrow.classList.toggle('expanded');
}}
}}
}}
// Close modal when clicking outside of it
window.onclick = function(event) {{
if (event.target.classList.contains('modal')) {{
event.target.classList.remove('show');
}}
}}
</script>
</head>
<body>
<div class="container">
<h1>🎄 Advent of Code Performance Comparison</h1>
<p class="subtitle">Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
<div class="nav-bar">
<h3>Jump to Year:</h3>
<div class="nav-links">
"""
# Add navigation links for each year
for year in sorted_years:
html += f' <a href="#year-{year}" class="nav-link">{year}</a>\n'
html += """ </div>
</div>
<div class="controls">
<p><strong>Users:</strong> """ + ', '.join(sorted(users)) + """</p>
""" + (f'<p><a href="cargo-aoc.log" target="_blank">📋 View Cargo AOC Logs</a></p>' if log_file_exists else '') + """
</div>
<!-- Stars Summary Table -->
<div class="summary" style="margin-top: 20px; margin-bottom: 20px;">
<h3>⭐ Stars Summary</h3>
<!--<p style="font-size: 0.9em; color: #666; margin-bottom: 10px;">Number of completed parts (stars) per user per year</p>-->
<table>
<thead>
<tr>
<th>User</th>
"""
# Add year columns
for year in sorted_years:
html += f" <th>{year}</th>\n"
html += """ <th>Total</th>
</tr>
</thead>
<tbody>
"""
# Add rows for each user
for user in sorted(users):
html += f""" <tr>
<td><strong>{user}</strong></td>
"""
total_stars = 0
for year in sorted_years:
stars = stars_by_user_year[user][year]
total_stars += stars
if stars > 0:
html += f" <td>{stars} ⭐</td>\n"
else:
html += " <td class=\"no-data\">-</td>\n"
html += f" <td><strong>{total_stars} ⭐</strong></td>\n"
html += " </tr>\n"
html += """ </tbody>
</table>
</div>
"""
# Generate content for each year (sorted descending)
for year in sorted_years:
if year not in data:
continue
html += f"""
<div class="year-section" id="year-{year}">
<h2 class="year-header">Year {year}</h2>
"""
# Add compact commit log for this year
html += self._generate_compact_commits_for_year(config, year)
html += """
"""
# Collect all day/part combinations for this year
day_part_combos = []
for day in sorted(data[year].keys()):
for part in sorted(data[year][day].keys()):
day_part_combos.append((day, part))
if not day_part_combos:
html += "<p>No data available for this year.</p>"
html += "</div>"
continue
# Collect user info (git rev, repo_url) for header
user_info = {}
for day, part in day_part_combos:
part_data = data[year][day][part]
for user in users:
if user not in user_info:
user_info[user] = {'git_rev': '', 'repo_url': ''}
if user in part_data:
time_data = part_data[user]
if isinstance(time_data, dict):
if not user_info[user]['git_rev']:
user_info[user]['git_rev'] = time_data.get('git_rev', '')
if not user_info[user]['repo_url']:
user_info[user]['repo_url'] = time_data.get('repo_url', '')
# Find fastest and slowest times per day/part for highlighting and speed multiple
fastest_times = {}
slowest_times = {}
speed_multiples = {}
for day, part in day_part_combos:
part_data = data[year][day][part]
times = []
for user, time_data in part_data.items():
if isinstance(time_data, dict):
total_time = time_data.get('total', 0)
else:
total_time = time_data if time_data > 0 else 0
if total_time > 0:
times.append(total_time)
if times:
fastest_times[(day, part)] = min(times)
slowest_times[(day, part)] = max(times)
if fastest_times[(day, part)] > 0:
speed_multiples[(day, part)] = slowest_times[(day, part)] / fastest_times[(day, part)]
else:
speed_multiples[(day, part)] = 0
# Create table with transposed structure
html += """
<table>
<thead>
<tr>
<th>Day/Part</th>
"""
# Add user columns with git rev in header
for user in sorted(users):
git_rev = user_info[user]['git_rev']
repo_url = user_info[user]['repo_url']
if git_rev and repo_url:
commit_url = repo_url.rstrip('/') + '/commit/' + git_rev
git_rev_html = f'<br><small><a href="{commit_url}" target="_blank" title="View commit">{git_rev[:7]}</a></small>'
elif git_rev:
git_rev_html = f'<br><small>{git_rev[:7]}</small>'
else:
git_rev_html = ''
html += f' <th>{user}{git_rev_html}</th>\n'
html += """ <th>Speed Multiple</th>
</tr>
</thead>
<tbody>
"""
# Add rows for each day/part combination
for day, part in day_part_combos:
part_data = data[year][day][part]
fastest_time = fastest_times.get((day, part), 0)
speed_multiple = speed_multiples.get((day, part), 0)
row_history_modals = []
aoc_url = f"https://adventofcode.com/{year}/day/{day}"
html += f"""
<tr>
<td><strong><a href="{aoc_url}" target="_blank">Day {day} Part {part}</a></strong></td>
"""
# Add timing data for each user
for user in sorted(users):
time_data = part_data.get(user, 0)
if time_data == 0 or (isinstance(time_data, dict) and time_data.get('total', 0) == 0):
html += ' <td class="no-data">-</td>\n'
else:
# Extract times
if isinstance(time_data, dict):
total_time_ns = time_data.get('total', 0)
runner_time_ns = time_data.get('runner', 0)
generator_time_ns = time_data.get('generator', 0)
git_rev = time_data.get('git_rev', '')
repo_url = time_data.get('repo_url', '')
else:
# Backward compatibility
total_time_ns = time_data
runner_time_ns = total_time_ns
generator_time_ns = 0
git_rev = ''
repo_url = ''
# Format total time
total_ms = total_time_ns / 1_000_000
total_us = total_time_ns / 1_000
if total_ms >= 1:
total_str = f"{total_ms:.2f} ms"
elif total_us >= 1:
total_str = f"{total_us:.2f} μs"
else:
total_str = f"{total_time_ns} ns"
# Determine if fastest
cell_class = ""
if fastest_time > 0 and total_time_ns == fastest_time:
cell_class = "fastest"
# Get historical data for this user/day/part
historical = db.get_historical_results(user, year, day, part)
history_link = ""
if len(historical) > 1:
history_items = []
hist_data_points = []
for hist in historical[:20]: # Show last 20 runs for graph
hist_total = hist['time_ns'] + hist.get('generator_time_ns', 0)
hist_ms = hist_total / 1_000_000
hist_us = hist_total / 1_000
if hist_ms >= 1:
hist_time_str = f"{hist_ms:.2f} ms"
elif hist_us >= 1:
hist_time_str = f"{hist_us:.2f} μs"
else:
hist_time_str = f"{hist_total} ns"
hist_git = hist.get('git_rev', '')[:7] if hist.get('git_rev') else '-'
hist_date = hist.get('timestamp', '')[:16] if hist.get('timestamp') else ''
hist_repo_url = hist.get('repo_url', '')
if hist_git != '-' and hist_repo_url:
hist_git_link = f'<a href="{hist_repo_url.rstrip("/")}/commit/{hist.get("git_rev", "")}" target="_blank">{hist_git}</a>'
else:
hist_git_link = hist_git
history_items.append(f'<div class="history-item">{hist_date}: {hist_time_str} ({hist_git_link})</div>')
# Store data for graph (reverse order for chronological display)
hist_data_points.insert(0, {
'time_ns': hist_total,
'timestamp': hist.get('timestamp', ''),
'date': hist_date
})
# Generate SVG graph
svg_graph = HTMLGenerator._generate_svg_graph(hist_data_points)
history_modal = f'''
<div id="history-{user}-{year}-{day}-{part}" class="modal">
<div class="modal-content">
<div class="modal-header">
<div class="modal-title">History: {user} - Year {year} Day {day} Part {part}</div>
<span class="modal-close" onclick="closeHistory('{user}', {year}, {day}, {part})">&times;</span>
</div>
<div>
<div class="history-graph">
<strong>Performance Trend:</strong>
{svg_graph}
</div>
<div style="margin-top: 20px;">
<strong>History Details:</strong>
{''.join(history_items)}
</div>
</div>
</div>
</div>
'''
row_history_modals.append(history_modal)
history_link = f' <span class="history-link" onclick="showHistory(\'{user}\', {year}, {day}, {part})">📊</span>'
html += f' <td class="time {cell_class}">{total_str}{history_link}</td>\n'
# Add speed multiple column
if speed_multiple > 0:
speed_multiple_str = f"{speed_multiple:.2f}x"
else:
speed_multiple_str = "-"
html += f' <td class="time">{speed_multiple_str}</td>\n'
html += """ </tr>
"""
# Add history modals after the row
for hist_modal in row_history_modals:
html += hist_modal
html += """ </tbody>
</table>
</div>
"""
# Add output bytes summary table
# Use the results passed to the method
html += """
<div class="summary">
<h3 class="collapsible-header" onclick="toggleCollapsible(this)">
<span class="collapsible-arrow">▶</span>
Output Bytes Summary
</h3>
<div class="collapsible-content">
<p style="font-size: 0.9em; color: #666; margin-bottom: 10px;">Number of bytes written to stdout for each day/part/user combination</p>
<table>
<thead>
<tr>
<th>Year</th>
<th>Day</th>
<th>Part</th>
"""
for user in sorted(users):
html += f" <th>{user}</th>\n"
html += """ </tr>
</thead>
<tbody>
"""
# Organize output bytes by year/day/part/user
output_bytes_data = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
for result in results: # results is passed as parameter to _generate_html
year = result['year']
day = result['day']
part = result['part']
user = result['user']
output_bytes = result.get('output_bytes', 0)
# Store output_bytes even if 0, but we'll show "-" for 0 in the table
output_bytes_data[year][day][part][user] = output_bytes
# Generate table rows - use the same structure as main data tables
for year in sorted_years:
if year not in data:
continue
for day in sorted(data[year].keys()):
for part in sorted(data[year][day].keys()):
aoc_url = f"https://adventofcode.com/{year}/day/{day}"
html += f""" <tr>
<td>{year}</td>
<td><a href="{aoc_url}" target="_blank">{day}</a></td>
<td>{part}</td>
"""
for user in sorted(users):
bytes_val = output_bytes_data[year][day][part].get(user, None)
if bytes_val is not None and bytes_val > 0:
html += f" <td>{bytes_val:,}</td>\n"
else:
html += " <td>-</td>\n"
html += " </tr>\n"
html += """ </tbody>
</table>
</div>
</div>
"""
# Add summary statistics at the bottom
html += f"""
<div class="summary">
<h3>Summary Statistics</h3>
<div class="summary-stats">
<div class="stat-item">
<div class="stat-label">Total Years</div>
<div class="stat-value">{len(data)}</div>
</div>
<div class="stat-item">
<div class="stat-label">Total Days</div>
<div class="stat-value">{total_days}</div>
</div>
<div class="stat-item">
<div class="stat-label">Total Parts</div>
<div class="stat-value">{total_parts}</div>
</div>
<div class="stat-item">
<div class="stat-label">Users with Data</div>
<div class="stat-value">{len(users_with_data)}</div>
</div>
</div>
</div>
"""
html += """
</div>
</body>
</html>
"""
return html
class AOCSync:
"""Main synchronization orchestrator"""
def __init__(self, config_path: str = "config.yaml", force_rerun: bool = False):
self.config = Config(config_path)
self.db = Database(os.path.join(self.config.data_dir, 'results.db'))
self.html_gen = HTMLGenerator(self.config.output_dir)
self.git_manager = GitManager()
self.force_rerun = force_rerun
# Ensure Podman image exists
self._ensure_podman_image()
def process_repository(self, repo_config: dict, user_name: str):
"""Process a single repository configuration"""
repo_type = repo_config.get('type', 'single')
if repo_type == 'single':
# Single repository with all years
url = repo_config['url']
local_path = repo_config['local_path']
# Always update the repo to get latest changes
if self.git_manager.clone_or_update_repo(url, local_path):
repo_path = Path(local_path)
# Check if years are specified in config
config_years = repo_config.get('years')
url = repo_config['url']
years_to_process = []
if config_years:
# Use years from config
years_to_process = config_years
else:
# Try to determine year(s) from the repository
years = CargoAOCRunner.extract_years_from_repo(repo_path)
if years:
years_to_process = years
else:
# If no year detected, check for year directories
logger.warning(f"No year detected for {user_name}, checking for year directories")
# Try common years as fallback
for try_year in [2025, 2024, 2023, 2022, 2021, 2020]:
year_dir = repo_path / str(try_year)
if year_dir.exists() and year_dir.is_dir():
logger.info(f"Found year directory {try_year} for {user_name}")
years_to_process.append(try_year)
# Process each year, checking for changes if not forcing
for year in years_to_process:
if self.force_rerun:
logger.info(f"Force rerun enabled, processing {user_name} year {year}...")
self._run_and_store_benchmarks(repo_path, year, user_name,
repo_url=url, is_multi_year=False)
else:
# Get last git_rev for this user/year from database
last_results = self.db.get_latest_results(years=[year], days=None)
last_git_rev = ""
for result in last_results:
if result['user'] == user_name and result['year'] == year:
last_git_rev = result.get('git_rev', '')
break
# Check if this year has changes
if self.git_manager.has_year_changes(repo_path, year, last_git_rev):
logger.info(f"Year {year} for {user_name} has changes, running benchmarks...")
self._run_and_store_benchmarks(repo_path, year, user_name,
repo_url=url, is_multi_year=False)
else:
logger.info(f"Year {year} for {user_name} has no changes, skipping...")
elif repo_type == 'multi-year':
# Multiple repositories, one per year
years_config = repo_config.get('years', [])
if not years_config:
logger.warning(f"No years configured for multi-year repository {user_name}")
return
logger.info(f"Processing multi-year repository {user_name} with {len(years_config)} year(s)")
for year_config in years_config:
year = year_config['year']
url = year_config['url']
local_path = year_config['local_path']
logger.debug(f"Checking {user_name} year {year} at {local_path}")
has_changes_result = GitManager.has_changes(url, local_path)
if self.force_rerun or has_changes_result:
if self.force_rerun:
logger.info(f"Force rerun enabled, processing repository {user_name} year {year}...")
else:
logger.info(f"Repository {user_name} year {year} has changes, updating...")
if GitManager.clone_or_update_repo(url, local_path):
repo_path = Path(local_path)
self._run_and_store_benchmarks(repo_path, year, user_name,
repo_url=url, is_multi_year=True)
else:
logger.error(f"Failed to clone/update repository {user_name} year {year} at {local_path}")
else:
logger.info(f"Repository {user_name} year {year} has no changes, skipping...")
def _check_year_in_repo(self, repo_path: Path, year: int) -> bool:
"""Check if a repository contains solutions for a specific year"""
# Simple heuristic: check if year appears in path or files
path_str = str(repo_path)
if str(year) in path_str:
return True
# Check Cargo.toml
cargo_toml = repo_path / 'Cargo.toml'
if cargo_toml.exists():
with open(cargo_toml, 'r') as f:
if str(year) in f.read():
return True
return False
def _run_and_store_benchmarks(self, repo_path: Path, year: int, user: str,
repo_url: str = "", is_multi_year: bool = False):
"""Run benchmarks and store results"""
logger.info(f"Running benchmarks for {user} year {year} in {repo_path}")
# Create log file path in output directory
log_file = Path(self.config.output_dir) / 'cargo-aoc.log'
log_file.parent.mkdir(parents=True, exist_ok=True)
# Get Podman configuration
docker_config = self.config.docker_config
results = CargoAOCRunner.run_benchmarks(repo_path, year=year, user=user,
repo_url=repo_url, is_multi_year=is_multi_year,
log_file=log_file, docker_config=docker_config)
# Store results
for result in results:
self.db.insert_result(result)
logger.info(f"Stored {len(results)} benchmark results for {user} year {year}")
def _ensure_podman_image(self):
"""Check if the Podman image exists, build it if it doesn't"""
docker_config = self.config.docker_config
image_name = docker_config.get('image', 'aocsync:latest')
# Only check/build if using aocsync:latest or custom image (not rust:latest)
if image_name != 'rust:latest':
# Check if image exists
try:
result = subprocess.run(
['podman', 'images', '--format', '{{.Repository}}:{{.Tag}}'],
capture_output=True,
text=True,
timeout=5
)
# Parse image name (handle cases like "aocsync:latest" or just "aocsync")
image_repo, image_tag = (image_name.split(':') + ['latest'])[:2]
# Check if image exists in the output
image_exists = False
for line in result.stdout.strip().split('\n'):
if line:
repo, tag = (line.split(':') + ['latest'])[:2]
if repo == image_repo and tag == image_tag:
image_exists = True
break
if not image_exists:
logger.info(f"Podman image {image_name} not found, building it...")
self._build_podman_image(image_name)
else:
logger.debug(f"Podman image {image_name} exists")
except Exception as e:
logger.warning(f"Could not check for Podman image {image_name}: {e}")
logger.info("Attempting to build image anyway...")
self._build_podman_image(image_name)
def _build_podman_image(self, image_name: str):
"""Build the Podman image from Dockerfile"""
dockerfile_path = Path(__file__).parent / 'Dockerfile'
if not dockerfile_path.exists():
logger.error(f"Dockerfile not found at {dockerfile_path}")
logger.error("Cannot build Podman image. Please create Dockerfile or use rust:latest image.")
return
try:
logger.info(f"Building Podman image {image_name} from {dockerfile_path}...")
result = subprocess.run(
['podman', 'build', '-t', image_name, '-f', str(dockerfile_path), str(dockerfile_path.parent)],
capture_output=True,
text=True,
timeout=600 # 10 minute timeout for build
)
if result.returncode == 0:
logger.info(f"Successfully built Podman image {image_name}")
else:
logger.error(f"Failed to build Podman image: {result.stderr}")
logger.warning("Falling back to rust:latest (will install cargo-aoc on each run)")
except subprocess.TimeoutExpired:
logger.error("Podman build timed out")
except Exception as e:
logger.error(f"Error building Podman image: {e}")
def sync_all(self, force: bool = None):
"""Sync all repositories"""
if force is not None:
original_force = self.force_rerun
self.force_rerun = force
logger.info("Starting sync of all repositories...")
# Clear log file at start of sync
log_file = Path(self.config.output_dir) / 'cargo-aoc.log'
log_file.parent.mkdir(parents=True, exist_ok=True)
# Clear the log file and write a header
with open(log_file, 'w', encoding='utf-8') as f:
f.write(f"{'#'*80}\n")
f.write(f"# Sync started at {datetime.now().isoformat()}\n")
f.write(f"{'#'*80}\n\n")
for repo_config in self.config.repositories:
user_name = repo_config['name']
try:
self.process_repository(repo_config, user_name)
except Exception as e:
logger.error(f"Error processing repository {user_name}: {e}")
# Generate HTML report
logger.info("Generating HTML report...")
self.html_gen.generate(self.db, self.config)
# Rsync output if configured
self._rsync_output()
if force is not None:
self.force_rerun = original_force
def sync_repo(self, repo_name: str, force: bool = True):
"""Sync a specific repository by name"""
logger.info(f"Starting sync for repository: {repo_name} (force={force})...")
original_force = self.force_rerun
self.force_rerun = force
# Append to log file instead of clearing
log_file = Path(self.config.output_dir) / 'cargo-aoc.log'
log_file.parent.mkdir(parents=True, exist_ok=True)
with open(log_file, 'a', encoding='utf-8') as f:
f.write(f"\n{'#'*80}\n")
f.write(f"# Sync started for {repo_name} at {datetime.now().isoformat()}\n")
f.write(f"{'#'*80}\n\n")
found = False
for repo_config in self.config.repositories:
if repo_config['name'] == repo_name:
found = True
try:
self.process_repository(repo_config, repo_name)
except Exception as e:
logger.error(f"Error processing repository {repo_name}: {e}")
break
if not found:
logger.error(f"Repository {repo_name} not found")
# Generate HTML report
logger.info("Generating HTML report...")
self.html_gen.generate(self.db, self.config)
# Rsync output if configured
self._rsync_output()
self.force_rerun = original_force
def sync_year(self, year: int, force: bool = True):
"""Sync all repositories for a specific year"""
logger.info(f"Starting sync for year: {year} (force={force})...")
original_force = self.force_rerun
self.force_rerun = force
# Append to log file instead of clearing
log_file = Path(self.config.output_dir) / 'cargo-aoc.log'
log_file.parent.mkdir(parents=True, exist_ok=True)
with open(log_file, 'a', encoding='utf-8') as f:
f.write(f"\n{'#'*80}\n")
f.write(f"# Sync started for year {year} at {datetime.now().isoformat()}\n")
f.write(f"{'#'*80}\n\n")
for repo_config in self.config.repositories:
user_name = repo_config['name']
repo_type = repo_config.get('type', 'single')
try:
if repo_type == 'single':
# Check if this repo has the year
url = repo_config['url']
local_path = repo_config['local_path']
if self.git_manager.clone_or_update_repo(url, local_path):
repo_path = Path(local_path)
config_years = repo_config.get('years', [])
years_to_process = config_years if config_years else CargoAOCRunner.extract_years_from_repo(repo_path)
if year in years_to_process:
logger.info(f"Processing {user_name} year {year}...")
self._run_and_store_benchmarks(repo_path, year, user_name,
repo_url=url, is_multi_year=False)
elif repo_type == 'multi-year':
years_config = repo_config.get('years', [])
for year_config in years_config:
if year_config['year'] == year:
url = year_config['url']
local_path = year_config['local_path']
if self.git_manager.clone_or_update_repo(url, local_path):
repo_path = Path(local_path)
self._run_and_store_benchmarks(repo_path, year, user_name,
repo_url=url, is_multi_year=True)
except Exception as e:
logger.error(f"Error processing repository {user_name} for year {year}: {e}")
# Generate HTML report
logger.info("Generating HTML report...")
self.html_gen.generate(self.db, self.config)
# Rsync output if configured
self._rsync_output()
self.force_rerun = original_force
def _rsync_output(self):
"""Rsync output directory to remote server if configured"""
rsync_config = self.config.rsync_config
if not rsync_config or not rsync_config.get('enabled', False):
return
destination = rsync_config.get('destination')
if not destination:
logger.warning("Rsync enabled but no destination specified")
return
output_dir = Path(self.config.output_dir)
if not output_dir.exists():
logger.warning(f"Output directory {output_dir} does not exist, skipping rsync")
return
logger.info(f"Rsyncing {output_dir} to {destination}...")
try:
# Build rsync command
# Use trailing slash on source to sync contents, not the directory itself
# This will include all files including cargo-aoc.log
source = str(output_dir) + "/"
cmd = ['rsync', '-avz', '--delete', source, destination]
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=60 # 1 minute timeout
)
if result.returncode == 0:
logger.info(f"Successfully rsynced output to {destination}")
else:
logger.error(f"Rsync failed: {result.stderr}")
except subprocess.TimeoutExpired:
logger.error("Rsync timed out")
except Exception as e:
logger.error(f"Error during rsync: {e}")
def run_continuous(self):
"""Run continuous polling"""
logger.info(f"Starting continuous polling (interval: {self.config.poll_interval}s)")
try:
while True:
self.sync_all()
logger.info(f"Sleeping for {self.config.poll_interval} seconds...")
time.sleep(self.config.poll_interval)
except KeyboardInterrupt:
logger.info("Stopped by user")
class WebServer:
"""Simple web server for viewing logs and triggering refreshes"""
def __init__(self, sync: AOCSync, host: str = '0.0.0.0', port: int = 8080):
self.sync = sync
self.host = host
self.port = port
self.app = None
self._setup_app()
def _setup_app(self):
"""Setup Flask application"""
if not FLASK_AVAILABLE:
raise ImportError("Flask is required for web server. Install with: pip install Flask")
self.app = Flask(__name__)
@self.app.route('/')
def index():
return self._get_index_page()
@self.app.route('/logs')
def logs():
"""View logs"""
log_file = Path(self.sync.config.output_dir) / 'cargo-aoc.log'
if log_file.exists():
with open(log_file, 'r', encoding='utf-8') as f:
content = f.read()
return Response(content, mimetype='text/plain')
return "No log file found", 404
@self.app.route('/api/refresh/all', methods=['POST'])
def refresh_all():
"""Trigger refresh for all repositories"""
thread = threading.Thread(target=self.sync.sync_all, kwargs={'force': True})
thread.daemon = True
thread.start()
return jsonify({'status': 'started', 'message': 'Refresh started for all repositories'})
@self.app.route('/api/sync/normal', methods=['POST'])
def sync_normal():
"""Trigger normal sync loop (without force)"""
thread = threading.Thread(target=self.sync.sync_all, kwargs={'force': False})
thread.daemon = True
thread.start()
return jsonify({'status': 'started', 'message': 'Normal sync started (will skip unchanged repositories)'})
@self.app.route('/api/refresh/repo/<repo_name>', methods=['POST'])
def refresh_repo(repo_name):
"""Trigger refresh for a specific repository"""
thread = threading.Thread(target=self.sync.sync_repo, args=(repo_name,), kwargs={'force': True})
thread.daemon = True
thread.start()
return jsonify({'status': 'started', 'message': f'Refresh started for repository: {repo_name}'})
@self.app.route('/api/refresh/year/<int:year>', methods=['POST'])
def refresh_year(year):
"""Trigger refresh for a specific year"""
thread = threading.Thread(target=self.sync.sync_year, args=(year,), kwargs={'force': True})
thread.daemon = True
thread.start()
return jsonify({'status': 'started', 'message': f'Refresh started for year: {year}'})
@self.app.route('/api/repos', methods=['GET'])
def get_repos():
"""Get list of repositories"""
repos = [{'name': r['name'], 'type': r.get('type', 'single')} for r in self.sync.config.repositories]
return jsonify({'repos': repos})
@self.app.route('/api/years', methods=['GET'])
def get_years():
"""Get list of years"""
years = self.sync.db.get_all_years()
return jsonify({'years': sorted(years, reverse=True)})
def _get_index_page(self):
"""Generate the main web interface page"""
repos = [r['name'] for r in self.sync.config.repositories]
years = sorted(self.sync.db.get_all_years(), reverse=True)
# Build HTML - use regular string and format variables manually to avoid brace escaping issues
html = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>AOC Sync Control Panel</title>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
min-height: 100vh;
padding: 20px;
}
.container {
max-width: 1200px;
margin: 0 auto;
background: white;
border-radius: 8px;
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
padding: 30px;
}
h1 {
color: #333;
margin-bottom: 10px;
font-size: 2em;
}
.section {
margin-bottom: 30px;
padding: 20px;
background: #f8f9fa;
border-radius: 6px;
}
.section h2 {
color: #667eea;
margin-bottom: 15px;
font-size: 1.3em;
}
.button-group {
display: flex;
flex-wrap: wrap;
gap: 10px;
margin-bottom: 15px;
}
button {
padding: 10px 20px;
background: #667eea;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 0.9em;
transition: background 0.2s;
}
button:hover {
background: #5568d3;
}
button:disabled {
background: #ccc;
cursor: not-allowed;
}
.button-danger {
background: #dc3545;
}
.button-danger:hover {
background: #c82333;
}
.status {
margin-top: 10px;
padding: 10px;
border-radius: 4px;
display: none;
}
.status.success {
background: #d4edda;
color: #155724;
border: 1px solid #c3e6cb;
}
.status.error {
background: #f8d7da;
color: #721c24;
border: 1px solid #f5c6cb;
}
.log-viewer {
background: #1e1e1e;
color: #d4d4d4;
padding: 15px;
border-radius: 4px;
font-family: 'Courier New', monospace;
font-size: 0.85em;
max-height: 500px;
overflow-y: auto;
white-space: pre-wrap;
word-wrap: break-word;
}
.log-viewer a {
color: #4ec9b0;
text-decoration: none;
}
.log-viewer a:hover {
text-decoration: underline;
}
</style>
</head>
<body>
<div class="container">
<h1>🎄 AOC Sync Control Panel</h1>
<div class="section">
<h2>Refresh Controls</h2>
<div style="margin-bottom: 20px;">
<h3 style="margin-bottom: 10px; color: #555;">Sync All</h3>
<button onclick="syncNormal()" id="btn-sync-normal" style="background: #28a745; margin-right: 10px;">▶️ Normal Sync (Skip Unchanged)</button>
<button onclick="refreshAll()" id="btn-refresh-all">🔄 Force Refresh All Repositories</button>
<div class="status" id="status-all"></div>
</div>
<div style="margin-bottom: 20px;">
<h3 style="margin-bottom: 10px; color: #555;">Refresh by Repository</h3>
<div class="button-group">
"""
for repo in repos:
# Use double quotes for outer string, single quotes for JavaScript
html += f" <button onclick=\"refreshRepo('{repo}')\" id=\"btn-repo-{repo}\">🔄 {repo}</button>\n"
html += """ </div>
<div class="status" id="status-repo"></div>
</div>
<div style="margin-bottom: 20px;">
<h3 style="margin-bottom: 10px; color: #555;">Refresh by Year</h3>
<div class="button-group">
"""
for year in years:
html += f' <button onclick="refreshYear({year})" id="btn-year-{year}">🔄 {year}</button>\n'
html += """ </div>
<div class="status" id="status-year"></div>
</div>
</div>
<div class="section">
<h2>Logs</h2>
<p style="margin-bottom: 10px; color: #666;">
<a href="/logs" target="_blank" style="color: #667eea;">📋 View Full Logs</a>
</p>
<div class="log-viewer" id="log-viewer">Loading logs...</div>
</div>
</div>
<script>
function showStatus(elementId, message, isError = false) {
const status = document.getElementById(elementId);
status.textContent = message;
status.className = 'status ' + (isError ? 'error' : 'success');
status.style.display = 'block';
setTimeout(() => {
status.style.display = 'none';
}, 5000);
}
async function syncNormal() {
const btn = document.getElementById('btn-sync-normal');
btn.disabled = true;
try {
const response = await fetch('/api/sync/normal', { method: 'POST' });
const data = await response.json();
showStatus('status-all', data.message || 'Normal sync started', false);
} catch (error) {
showStatus('status-all', 'Error: ' + error.message, true);
} finally {
btn.disabled = false;
}
}
async function refreshAll() {
const btn = document.getElementById('btn-refresh-all');
btn.disabled = true;
try {
const response = await fetch('/api/refresh/all', { method: 'POST' });
const data = await response.json();
showStatus('status-all', data.message || 'Refresh started', false);
} catch (error) {
showStatus('status-all', 'Error: ' + error.message, true);
} finally {
btn.disabled = false;
}
}
async function refreshRepo(repoName) {
const btn = document.getElementById('btn-repo-' + repoName);
btn.disabled = true;
try {
const response = await fetch('/api/refresh/repo/' + encodeURIComponent(repoName), { method: 'POST' });
const data = await response.json();
showStatus('status-repo', data.message || 'Refresh started', false);
} catch (error) {
showStatus('status-repo', 'Error: ' + error.message, true);
} finally {
btn.disabled = false;
}
}
async function refreshYear(year) {
const btn = document.getElementById('btn-year-' + year);
btn.disabled = true;
try {
const response = await fetch('/api/refresh/year/' + year, { method: 'POST' });
const data = await response.json();
showStatus('status-year', data.message || 'Refresh started', false);
} catch (error) {
showStatus('status-year', 'Error: ' + error.message, true);
} finally {
btn.disabled = false;
}
}
async function loadLogs() {
try {
const response = await fetch('/logs');
if (response.ok) {
const text = await response.text();
const logViewer = document.getElementById('log-viewer');
// Show last 5000 characters
logViewer.textContent = text.slice(-5000);
logViewer.scrollTop = logViewer.scrollHeight;
} else {
document.getElementById('log-viewer').textContent = 'No logs available';
}
} catch (error) {
document.getElementById('log-viewer').textContent = 'Error loading logs: ' + error.message;
}
}
// Load logs on page load and refresh every 5 seconds
loadLogs();
setInterval(loadLogs, 5000);
</script>
</body>
</html>"""
return html
def run(self):
"""Run the web server"""
logger.info(f"Starting web server on http://{self.host}:{self.port}")
self.app.run(host=self.host, port=self.port, debug=False, use_reloader=False)
def main():
"""Main entry point"""
import argparse
parser = argparse.ArgumentParser(description='AOC Sync - Poll and compare AOC implementations')
parser.add_argument('--config', default='config.yaml', help='Path to config file')
parser.add_argument('--once', action='store_true', help='Run once instead of continuously')
parser.add_argument('--force', '--rerun-all', action='store_true', dest='force_rerun',
help='Force rerun all days even if repository has not changed')
parser.add_argument('--web', action='store_true', help='Start web server for logs and refresh controls')
parser.add_argument('--web-host', default='0.0.0.0', help='Web server host (default: 0.0.0.0)')
parser.add_argument('--web-port', type=int, default=8080, help='Web server port (default: 8080)')
args = parser.parse_args()
sync = AOCSync(args.config, force_rerun=args.force_rerun)
if args.web:
if not FLASK_AVAILABLE:
logger.error("Flask is required for web server. Install with: pip install Flask")
sys.exit(1)
# Start web server
web_server = WebServer(sync, host=args.web_host, port=args.web_port)
if args.once:
# Run once then start web server
sync.sync_all()
web_server.run()
else:
# Start web server in background thread, then run continuous sync
web_thread = threading.Thread(target=web_server.run)
web_thread.daemon = True
web_thread.start()
sync.run_continuous()
else:
if args.once:
sync.sync_all()
else:
sync.run_continuous()
if __name__ == '__main__':
main()