diff --git a/README.md b/README.md index c8cad1b..d023884 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ A Python script that polls multiple git repositories containing Advent of Code i - **Automatic Git Polling**: Monitors multiple repositories for changes - **Flexible Repository Structure**: Supports both single-repo (all years) and multi-repo (one per year) configurations -- **Automatic Benchmarking**: Runs `cargo aoc bench` for all implemented days +- **Automatic Runtime Measurement**: Runs `cargo aoc` for all implemented days and parses runtime information - **Performance Parsing**: Extracts timing data from cargo-aoc output - **Data Storage**: SQLite database for historical performance data - **HTML Reports**: Beautiful, responsive HTML comparison pages @@ -38,6 +38,8 @@ pip install -r requirements.txt cargo install cargo-aoc ``` +Note: The script runs `cargo aoc` (not `cargo aoc bench`) and parses runtime information from the output. + ## Configuration Edit `config.yaml` to configure repositories to monitor: @@ -118,8 +120,8 @@ The HTML report includes: 1. **Git Polling**: Checks each configured repository for changes by comparing local and remote commits 2. **Repository Update**: Clones new repositories or updates existing ones when changes are detected 3. **Day Detection**: Automatically finds implemented days by scanning for `day*.rs` files and Cargo.toml entries -4. **Benchmarking**: Runs `cargo aoc bench --day X` for each implemented day -5. **Parsing**: Extracts timing data from cargo-aoc output (handles nanoseconds, microseconds, milliseconds) +4. **Runtime Measurement**: Runs `cargo aoc --day X` for each implemented day +5. **Parsing**: Extracts timing data from cargo-aoc output (handles nanoseconds, microseconds, milliseconds, seconds) 6. **Storage**: Stores results in SQLite database with timestamps 7. **Report Generation**: Generates HTML comparison page showing latest results @@ -146,8 +148,9 @@ If benchmarks take too long, the script has a 5-minute timeout per day. Adjust i ### Missing performance data If some users/days/parts don't show up: -- Check that `cargo aoc bench` runs successfully in the repository +- Check that `cargo aoc --day X` runs successfully in the repository - Verify the repository structure matches cargo-aoc conventions +- Ensure `cargo aoc` outputs timing information (check if it's configured to show runtime) - Check logs for parsing errors ## License diff --git a/aocsync.py b/aocsync.py index 2a98710..dac0025 100755 --- a/aocsync.py +++ b/aocsync.py @@ -305,19 +305,25 @@ class CargoAOCRunner: """Runs cargo-aoc benchmarks and parses results""" @staticmethod - def find_implemented_days(repo_path: Path) -> List[int]: - """Find which days are implemented in the repository""" + def find_implemented_days(work_dir: Path) -> List[int]: + """Find which days are implemented in the directory + + Args: + work_dir: Directory to search (should be a year directory for single repos) + """ days = [] + work_dir = Path(work_dir) # Look for common patterns: src/bin/day01.rs, src/day01.rs, etc. patterns = [ - repo_path / 'src' / 'bin' / 'day*.rs', - repo_path / 'src' / 'day*.rs', - repo_path / 'src' / '**' / 'day*.rs', + 'src/bin/day*.rs', + 'src/day*.rs', + '**/src/bin/day*.rs', + '**/src/day*.rs', ] for pattern in patterns: - for day_file in repo_path.glob(str(pattern.relative_to(repo_path))): + for day_file in work_dir.glob(pattern): match = re.search(r'day(\d+)', day_file.name) if match: day_num = int(match.group(1)) @@ -325,7 +331,7 @@ class CargoAOCRunner: days.append(day_num) # Also check for Cargo.toml with day references - cargo_toml = repo_path / 'Cargo.toml' + cargo_toml = work_dir / 'Cargo.toml' if cargo_toml.exists(): with open(cargo_toml, 'r') as f: content = f.read() @@ -338,140 +344,169 @@ class CargoAOCRunner: @staticmethod def extract_years_from_repo(repo_path: Path) -> List[int]: - """Try to extract year(s) from repository path, name, or structure""" + """Try to extract year(s) from repository structure + + For single repos, looks for year directories in the root (e.g., 2023/, 2024/) + """ years = [] repo_path = Path(repo_path) - # Check path name - path_str = str(repo_path) - for year_match in re.finditer(r'(\d{4})', path_str): - year = int(year_match.group(1)) - if 2015 <= year <= 2030 and year not in years: # Reasonable range - years.append(year) - - # Check for year directories (common pattern: src/2023/, year2023/, etc.) + # Check for year directories in root (e.g., 2023/, 2024/) + # These should be directories with 4-digit year names for item in repo_path.iterdir(): - if item.is_dir(): - year_match = re.search(r'(\d{4})', item.name) - if year_match: - year = int(year_match.group(1)) - if 2015 <= year <= 2030 and year not in years: + if item.is_dir() and not item.name.startswith('.'): + # Check if directory name is exactly a 4-digit year + if re.match(r'^\d{4}$', item.name): + year = int(item.name) + if 2015 <= year <= 2030: # Reasonable range years.append(year) - # Check Cargo.toml - cargo_toml = repo_path / 'Cargo.toml' - if cargo_toml.exists(): - with open(cargo_toml, 'r') as f: - content = f.read() - for year_match in re.finditer(r'(\d{4})', content): - year = int(year_match.group(1)) - if 2015 <= year <= 2030 and year not in years: - years.append(year) + # Also check path name as fallback + if not years: + path_str = str(repo_path) + for year_match in re.finditer(r'(\d{4})', path_str): + year = int(year_match.group(1)) + if 2015 <= year <= 2030 and year not in years: + years.append(year) return sorted(years) if years else [] @staticmethod - def run_benchmarks(repo_path: Path, year: Optional[int] = None, - user: str = "unknown") -> List[PerformanceResult]: - """Run cargo aoc benchmarks and parse results""" + def run_benchmarks(repo_path: Path, year: int, user: str = "unknown", + is_multi_year: bool = False) -> List[PerformanceResult]: + """Run cargo aoc benchmarks and parse results + + Args: + repo_path: Path to the repository root (for single repos) or year directory (for multi-year repos) + year: The year to benchmark + user: User name for the results + is_multi_year: True if this is a multi-year repo (repo_path is already the year directory) + """ results = [] repo_path = Path(repo_path) - if not (repo_path / 'Cargo.toml').exists(): - logger.warning(f"No Cargo.toml found in {repo_path}") + # Determine the working directory + if is_multi_year: + # For multi-year repos, repo_path is already the year directory + work_dir = repo_path + else: + # For single repos, check if we need to navigate to a year subdirectory + work_dir = repo_path + year_dir = repo_path / str(year) + if year_dir.exists() and year_dir.is_dir(): + work_dir = year_dir + logger.info(f"Using year directory: {work_dir}") + + if not (work_dir / 'Cargo.toml').exists(): + logger.warning(f"No Cargo.toml found in {work_dir}") return results - days = CargoAOCRunner.find_implemented_days(repo_path) - logger.info(f"Found {len(days)} implemented days in {repo_path}") + days = CargoAOCRunner.find_implemented_days(work_dir) + logger.info(f"Found {len(days)} implemented days in {work_dir}") for day in days: try: - # Run cargo aoc bench for this day - cmd = ['cargo', 'aoc', 'bench', '--day', str(day)] - if year: - cmd.extend(['--year', str(year)]) + logger.info(f"Running cargo aoc for {user} year {year} day {day} in {work_dir}") + # Run cargo aoc for this day (no year flag, must be in correct directory) + cmd = ['cargo', 'aoc', '--day', str(day)] result = subprocess.run( cmd, - cwd=repo_path, + cwd=work_dir, capture_output=True, text=True, timeout=300 # 5 minute timeout per day ) if result.returncode != 0: - logger.warning(f"cargo aoc bench failed for day {day}: {result.stderr}") + logger.warning(f"cargo aoc failed for day {day} in {work_dir}: {result.stderr}") continue - # Parse output for performance data - # Try to extract year from output if not provided - actual_year = year - if not actual_year: - # Look for year in output - year_match = re.search(r'(\d{4})', result.stdout) - if year_match: - potential_year = int(year_match.group(1)) - if 2015 <= potential_year <= 2030: - actual_year = potential_year - - if not actual_year: - logger.warning(f"Could not determine year for day {day}, skipping") - continue - - day_results = CargoAOCRunner._parse_benchmark_output( - result.stdout, day, actual_year, user + # Parse output for runtime information + day_results = CargoAOCRunner._parse_runtime_output( + result.stdout, result.stderr, day, year, user ) results.extend(day_results) except subprocess.TimeoutExpired: - logger.error(f"Timeout running benchmarks for day {day}") + logger.error(f"Timeout running cargo aoc for day {day}") except Exception as e: - logger.error(f"Error running benchmarks for day {day}: {e}") + logger.error(f"Error running cargo aoc for day {day}: {e}") return results @staticmethod - def _parse_benchmark_output(output: str, day: int, year: int, - user: str) -> List[PerformanceResult]: - """Parse cargo-aoc benchmark output""" + def _parse_runtime_output(stdout: str, stderr: str, day: int, year: int, + user: str) -> List[PerformanceResult]: + """Parse cargo-aoc runtime output + + cargo aoc typically outputs timing information like: + - "Day X - Part Y: XXX.XXX ms" + - "Day X - Part Y: XXX.XXX μs" + - "Day X - Part Y: XXX.XXX ns" + - Or similar formats + """ results = [] timestamp = datetime.now().isoformat() - # Pattern: "Day X - Part Y: XXX.XXX ns (XXX.XXX ms)" - # or "Day X - Part Y: XXX.XXX ns" - # Also handles formats like "Day 1 Part 1", "day01-part1", etc. + # Combine stdout and stderr (timing info might be in either) + output = stdout + "\n" + stderr + + # Patterns to match various cargo-aoc output formats + # Common formats: + # "Day 1 - Part 1: 123.456 ms" + # "Day 1 Part 1: 123.456 ms" + # "day 1 - part 1: 123.456 ms" + # "Part 1: 123.456 ms" (when day is already known) patterns = [ - r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]+([\d.]+)\s*ns', - r'day\s+(\d+)\s*-\s*part\s+(\d+)[:\s]+([\d.]+)\s*ns', - r'Day\s+(\d+)\s+Part\s+(\d+)[:\s]+([\d.]+)\s*ns', - r'day\s+(\d+)\s+part\s+(\d+)[:\s]+([\d.]+)\s*ns', - r'day(\d+)\s*-\s*part(\d+)[:\s]+([\d.]+)\s*ns', - r'(\d+)\s*-\s*(\d+)[:\s]+([\d.]+)\s*ns', - # Handle microseconds and milliseconds too - r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]+([\d.]+)\s*(?:ns|μs|us|ms)', + # Full format with day and part + r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', + r'day\s+(\d+)\s*-\s*part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', + r'Day\s+(\d+)\s+Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', + r'day\s+(\d+)\s+part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', + r'day(\d+)\s*-\s*part(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', + # Part only (use provided day) + r'Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', + r'part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', ] for pattern in patterns: for match in re.finditer(pattern, output, re.IGNORECASE): - part_day = int(match.group(1)) - part_num = int(match.group(2)) - time_str = match.group(3) + groups = match.groups() - # Use the day from the match if available, otherwise use provided day - actual_day = part_day if part_day > 0 else day + # Determine day and part based on pattern + if len(groups) == 4: + # Pattern with day and part + part_day = int(groups[0]) + part_num = int(groups[1]) + time_str = groups[2] + unit = groups[3].lower() + actual_day = part_day if part_day > 0 and part_day <= 25 else day + elif len(groups) == 3: + # Pattern with only part (use provided day) + part_num = int(groups[0]) + time_str = groups[1] + unit = groups[2].lower() + actual_day = day + else: + continue try: - time_ns = int(float(time_str)) + time_val = float(time_str) - # Check if the time unit is in the match (for patterns that include it) - # If not, assume nanoseconds (most common) - # Look for unit in the original match context - match_text = match.group(0).lower() - if 'ms' in match_text or 'millisecond' in match_text: - time_ns = int(time_ns * 1_000_000) # Convert ms to ns - elif 'μs' in match_text or 'us' in match_text or 'microsecond' in match_text: - time_ns = int(time_ns * 1_000) # Convert μs to ns - # else: already in nanoseconds + # Convert to nanoseconds + if unit == 's' or unit == 'sec' or unit == 'second': + time_ns = int(time_val * 1_000_000_000) + elif unit == 'ms' or unit == 'millisecond': + time_ns = int(time_val * 1_000_000) + elif unit == 'μs' or unit == 'us' or unit == 'microsecond': + time_ns = int(time_val * 1_000) + elif unit == 'ns' or unit == 'nanosecond': + time_ns = int(time_val) + else: + # Default to nanoseconds if unit unclear + logger.warning(f"Unknown time unit '{unit}', assuming nanoseconds") + time_ns = int(time_val) results.append(PerformanceResult( user=user, @@ -894,7 +929,8 @@ class AOCSync: if config_years: # Use years from config for year in config_years: - self._run_and_store_benchmarks(repo_path, year, user_name) + self._run_and_store_benchmarks(repo_path, year, user_name, + is_multi_year=False) else: # Try to determine year(s) from the repository years = CargoAOCRunner.extract_years_from_repo(repo_path) @@ -902,19 +938,18 @@ class AOCSync: if years: # Run benchmarks for each detected year for year in years: - self._run_and_store_benchmarks(repo_path, year, user_name) + self._run_and_store_benchmarks(repo_path, year, user_name, + is_multi_year=False) else: - # If no year detected, try running without year specification - # cargo-aoc might infer it, or we'll try common years - logger.info(f"No year detected for {user_name}, trying without year specification") - results = CargoAOCRunner.run_benchmarks(repo_path, year=None, user=user_name) - for result in results: - self.db.insert_result(result) - - # Also try common years as fallback - for try_year in [2023, 2024, 2022, 2021, 2020]: - if self._check_year_in_repo(repo_path, try_year): - self._run_and_store_benchmarks(repo_path, try_year, user_name) + # If no year detected, check for year directories + logger.warning(f"No year detected for {user_name}, checking for year directories") + # Try common years as fallback + for try_year in [2025, 2024, 2023, 2022, 2021, 2020]: + year_dir = repo_path / str(try_year) + if year_dir.exists() and year_dir.is_dir(): + logger.info(f"Found year directory {try_year} for {user_name}") + self._run_and_store_benchmarks(repo_path, try_year, user_name, + is_multi_year=False) elif repo_type == 'multi-year': # Multiple repositories, one per year @@ -928,7 +963,8 @@ class AOCSync: logger.info(f"Repository {user_name} year {year} has changes, updating...") if self.git_manager.clone_or_update_repo(url, local_path): repo_path = Path(local_path) - self._run_and_store_benchmarks(repo_path, year, user_name) + self._run_and_store_benchmarks(repo_path, year, user_name, + is_multi_year=True) def _check_year_in_repo(self, repo_path: Path, year: int) -> bool: """Check if a repository contains solutions for a specific year""" @@ -946,10 +982,12 @@ class AOCSync: return False - def _run_and_store_benchmarks(self, repo_path: Path, year: int, user: str): + def _run_and_store_benchmarks(self, repo_path: Path, year: int, user: str, + is_multi_year: bool = False): """Run benchmarks and store results""" logger.info(f"Running benchmarks for {user} year {year} in {repo_path}") - results = CargoAOCRunner.run_benchmarks(repo_path, year=year, user=user) + results = CargoAOCRunner.run_benchmarks(repo_path, year=year, user=user, + is_multi_year=is_multi_year) # Store results for result in results: