From 61b10131b41374bb8b7de1df1f83ef5c93f9f395 Mon Sep 17 00:00:00 2001 From: Bill Thiede Date: Tue, 2 Dec 2025 16:30:00 -0800 Subject: [PATCH] Fix parsing for cargo aoc --- aocsync.py | 178 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 159 insertions(+), 19 deletions(-) diff --git a/aocsync.py b/aocsync.py index 944f96a..ad3bb70 100755 --- a/aocsync.py +++ b/aocsync.py @@ -422,6 +422,10 @@ class CargoAOCRunner: logger.warning(f"cargo aoc failed for day {day} in {work_dir}: {result.stderr}") continue + # Log output for debugging if no results found + if not result.stdout.strip() and not result.stderr.strip(): + logger.warning(f"No output from cargo aoc for {user} year {year} day {day}") + # Parse output for runtime information day_results = CargoAOCRunner._parse_runtime_output( result.stdout, result.stderr, day, year, user @@ -429,7 +433,9 @@ class CargoAOCRunner: if day_results: logger.info(f"Parsed {len(day_results)} runtime result(s) for {user} year {year} day {day}") else: - logger.warning(f"No runtime data parsed for {user} year {year} day {day}") + # Log a sample of the output to help debug parsing issues + output_sample = (result.stdout + "\n" + result.stderr).strip()[:500] + logger.warning(f"No runtime data parsed for {user} year {year} day {day}. Output sample: {output_sample}") results.extend(day_results) except subprocess.TimeoutExpired: @@ -448,6 +454,7 @@ class CargoAOCRunner: - "Day X - Part Y: XXX.XXX ms" - "Day X - Part Y: XXX.XXX μs" - "Day X - Part Y: XXX.XXX ns" + - "Part Y: XXX.XXX ms" - Or similar formats """ results = [] @@ -462,20 +469,86 @@ class CargoAOCRunner: # "Day 1 Part 1: 123.456 ms" # "day 1 - part 1: 123.456 ms" # "Part 1: 123.456 ms" (when day is already known) + # Also handle formats like "Day 01 - Part 1" or "Day 1, Part 1" + # And the format with generator/runner on separate lines: + # "Day 2 - Part 1 : " + # " generator: 5.651µs," + # " runner: 3.07µs" patterns = [ - # Full format with day and part - r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', - r'day\s+(\d+)\s*-\s*part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', - r'Day\s+(\d+)\s+Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', - r'day\s+(\d+)\s+part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', - r'day(\d+)\s*-\s*part(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', - # Part only (use provided day) - r'Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', - r'part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)', + # Format with generator/runner on separate lines (most common cargo-aoc format) + # Match "Day X - Part Y" followed by lines with "runner:" or "generator:" + r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]+.*?(?:^|\n).*?runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', + r'day\s+(\d+)\s*-\s*part\s+(\d+)[:\s]+.*?(?:^|\n).*?runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', + # Standalone runner/generator lines (for when we're already in a Day X - Part Y block) + r'runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', + r'generator\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', + # Full format with day and part - various separators + r'Day\s+(\d+)\s*[-,\s]+\s*Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', + r'day\s+(\d+)\s*[-,\s]+\s*part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', + r'Day\s+(\d+)\s+Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', + r'day\s+(\d+)\s+part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', + r'day(\d+)\s*[-,\s]+\s*part(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', + # Part only (use provided day) - more flexible + r'Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', + r'part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', + # Handle formats without explicit "Part" label + r'Day\s+(\d+)\s*[-,\s]+\s*(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', + # Handle formats with parentheses or brackets + r'\(Part\s+(\d+)\)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', + r'\[Part\s+(\d+)\][:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', ] + # First, try to parse the generator/runner format which is most common + # Look for "Day X - Part Y" lines and extract runner times from following lines + lines = output.split('\n') + current_day = None + current_part = None + + for i, line in enumerate(lines): + # Check if this line starts a new Day/Part block + day_part_match = re.match(r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]', line, re.IGNORECASE) + if day_part_match: + current_day = int(day_part_match.group(1)) + current_part = int(day_part_match.group(2)) + actual_day = current_day if current_day > 0 and current_day <= 25 else day + continue + + # If we're in a Day/Part block, look for runner timing + if current_day is not None and current_part is not None: + runner_match = re.search(r'runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', line, re.IGNORECASE) + if runner_match: + time_str = runner_match.group(1) + unit = runner_match.group(2).lower() + try: + time_val = float(time_str) + time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit) + results.append(PerformanceResult( + user=user, + year=year, + day=actual_day, + part=current_part, + time_ns=time_ns, + timestamp=timestamp + )) + # Reset after finding runner (in case there are multiple parts) + # But keep current_day/current_part until we hit the next Day line + except ValueError: + logger.warning(f"Could not parse runner time: {time_str}") + + # Check if next line starts a new Day/Part block (reset current context) + if i + 1 < len(lines): + next_day_match = re.match(r'Day\s+\d+\s*-\s*Part\s+\d+', lines[i + 1], re.IGNORECASE) + if next_day_match: + # Don't reset yet - let the next iteration handle it + pass + + # If we found results with the line-by-line approach, return them + if results: + return results + + # Otherwise, try the original pattern-based approach for pattern in patterns: - for match in re.finditer(pattern, output, re.IGNORECASE): + for match in re.finditer(pattern, output, re.IGNORECASE | re.MULTILINE): groups = match.groups() # Determine day and part based on pattern @@ -492,6 +565,10 @@ class CargoAOCRunner: time_str = groups[1] unit = groups[2].lower() actual_day = day + elif len(groups) == 2: + # Standalone runner/generator line (use provided day, assume part from context) + # This is tricky - we'll skip these and rely on the block-based approach above + continue else: continue @@ -512,17 +589,80 @@ class CargoAOCRunner: logger.warning(f"Unknown time unit '{unit}', assuming nanoseconds") time_ns = int(time_val) - results.append(PerformanceResult( - user=user, - year=year, - day=actual_day, - part=part_num, - time_ns=time_ns, - timestamp=timestamp - )) + # Avoid duplicates + if not any(r.day == actual_day and r.part == part_num + for r in results): + results.append(PerformanceResult( + user=user, + year=year, + day=actual_day, + part=part_num, + time_ns=time_ns, + timestamp=timestamp + )) except ValueError: logger.warning(f"Could not parse time: {time_str}") + # If no results found, try a more lenient approach - look for any numbers with time units + if not results: + # Look for patterns like "123.456ms" or "123.456 ms" anywhere in output + lenient_patterns = [ + r'([\d.]+)\s*(ns|μs|us|ms|s|sec)', + r'([\d.]+)(ns|μs|us|ms|s|sec)', + ] + + # Try to extract parts sequentially if we find timing info + for pattern in lenient_patterns: + matches = list(re.finditer(pattern, output, re.IGNORECASE)) + if matches: + # If we find exactly 1 or 2 matches, assume they're Part 1 and Part 2 + if len(matches) == 1: + match = matches[0] + time_val = float(match.group(1)) + unit = match.group(2).lower() + time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit) + results.append(PerformanceResult( + user=user, + year=year, + day=day, + part=1, + time_ns=time_ns, + timestamp=timestamp + )) + elif len(matches) == 2: + for idx, match in enumerate(matches, 1): + time_val = float(match.group(1)) + unit = match.group(2).lower() + time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit) + results.append(PerformanceResult( + user=user, + year=year, + day=day, + part=idx, + time_ns=time_ns, + timestamp=timestamp + )) + break + + return results + + @staticmethod + def _convert_to_nanoseconds(time_val: float, unit: str) -> int: + """Convert time value to nanoseconds based on unit""" + unit = unit.lower() + # Handle unicode micro symbol (µ) and regular u + if unit == 's' or unit == 'sec' or unit == 'second': + return int(time_val * 1_000_000_000) + elif unit == 'ms' or unit == 'millisecond': + return int(time_val * 1_000_000) + elif unit == 'μs' or unit == 'µs' or unit == 'us' or unit == 'microsecond': + return int(time_val * 1_000) + elif unit == 'ns' or unit == 'nanosecond': + return int(time_val) + else: + # Default to nanoseconds + return int(time_val) + return results