Fix parsing for cargo aoc

2025-12-02 16:30:00 -08:00
parent 086fc2ff3e
commit 61b10131b4
1 changed files with 159 additions and 19 deletions
--- a/aocsync.py
+++ b/aocsync.py
@@ -422,6 +422,10 @@ class CargoAOCRunner:
                    logger.warning(f"cargo aoc failed for day {day} in {work_dir}: {result.stderr}")
                    continue
                
+                # Log output for debugging if no results found
+                if not result.stdout.strip() and not result.stderr.strip():
+                    logger.warning(f"No output from cargo aoc for {user} year {year} day {day}")
+                
                # Parse output for runtime information
                day_results = CargoAOCRunner._parse_runtime_output(
                    result.stdout, result.stderr, day, year, user
@@ -429,7 +433,9 @@ class CargoAOCRunner:
                if day_results:
                    logger.info(f"Parsed {len(day_results)} runtime result(s) for {user} year {year} day {day}")
                else:
-                    logger.warning(f"No runtime data parsed for {user} year {year} day {day}")
+                    # Log a sample of the output to help debug parsing issues
+                    output_sample = (result.stdout + "\n" + result.stderr).strip()[:500]
+                    logger.warning(f"No runtime data parsed for {user} year {year} day {day}. Output sample: {output_sample}")
                results.extend(day_results)
                
            except subprocess.TimeoutExpired:
@@ -448,6 +454,7 @@ class CargoAOCRunner:
        - "Day X - Part Y: XXX.XXX ms"
        - "Day X - Part Y: XXX.XXX μs"
        - "Day X - Part Y: XXX.XXX ns"
+        - "Part Y: XXX.XXX ms"
        - Or similar formats
        """
        results = []
@@ -462,20 +469,86 @@ class CargoAOCRunner:
        # "Day 1 Part 1: 123.456 ms"
        # "day 1 - part 1: 123.456 ms"
        # "Part 1: 123.456 ms" (when day is already known)
+        # Also handle formats like "Day 01 - Part 1" or "Day 1, Part 1"
+        # And the format with generator/runner on separate lines:
+        # "Day 2 - Part 1 : <answer>"
+        # "        generator: 5.651µs,"
+        # "        runner: 3.07µs"
        patterns = [
-            # Full format with day and part
-            r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
-            r'day\s+(\d+)\s*-\s*part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
-            r'Day\s+(\d+)\s+Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
-            r'day\s+(\d+)\s+part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
-            r'day(\d+)\s*-\s*part(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
-            # Part only (use provided day)
-            r'Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
-            r'part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
+            # Format with generator/runner on separate lines (most common cargo-aoc format)
+            # Match "Day X - Part Y" followed by lines with "runner:" or "generator:"
+            r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]+.*?(?:^|\n).*?runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'day\s+(\d+)\s*-\s*part\s+(\d+)[:\s]+.*?(?:^|\n).*?runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            # Standalone runner/generator lines (for when we're already in a Day X - Part Y block)
+            r'runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'generator\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            # Full format with day and part - various separators
+            r'Day\s+(\d+)\s*[-,\s]+\s*Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'day\s+(\d+)\s*[-,\s]+\s*part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'Day\s+(\d+)\s+Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'day\s+(\d+)\s+part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'day(\d+)\s*[-,\s]+\s*part(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            # Part only (use provided day) - more flexible
+            r'Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            # Handle formats without explicit "Part" label
+            r'Day\s+(\d+)\s*[-,\s]+\s*(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            # Handle formats with parentheses or brackets
+            r'\(Part\s+(\d+)\)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'\[Part\s+(\d+)\][:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
        ]
        
+        # First, try to parse the generator/runner format which is most common
+        # Look for "Day X - Part Y" lines and extract runner times from following lines
+        lines = output.split('\n')
+        current_day = None
+        current_part = None
+        
+        for i, line in enumerate(lines):
+            # Check if this line starts a new Day/Part block
+            day_part_match = re.match(r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]', line, re.IGNORECASE)
+            if day_part_match:
+                current_day = int(day_part_match.group(1))
+                current_part = int(day_part_match.group(2))
+                actual_day = current_day if current_day > 0 and current_day <= 25 else day
+                continue
+            
+            # If we're in a Day/Part block, look for runner timing
+            if current_day is not None and current_part is not None:
+                runner_match = re.search(r'runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', line, re.IGNORECASE)
+                if runner_match:
+                    time_str = runner_match.group(1)
+                    unit = runner_match.group(2).lower()
+                    try:
+                        time_val = float(time_str)
+                        time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit)
+                        results.append(PerformanceResult(
+                            user=user,
+                            year=year,
+                            day=actual_day,
+                            part=current_part,
+                            time_ns=time_ns,
+                            timestamp=timestamp
+                        ))
+                        # Reset after finding runner (in case there are multiple parts)
+                        # But keep current_day/current_part until we hit the next Day line
+                    except ValueError:
+                        logger.warning(f"Could not parse runner time: {time_str}")
+                
+                # Check if next line starts a new Day/Part block (reset current context)
+                if i + 1 < len(lines):
+                    next_day_match = re.match(r'Day\s+\d+\s*-\s*Part\s+\d+', lines[i + 1], re.IGNORECASE)
+                    if next_day_match:
+                        # Don't reset yet - let the next iteration handle it
+                        pass
+        
+        # If we found results with the line-by-line approach, return them
+        if results:
+            return results
+        
+        # Otherwise, try the original pattern-based approach
        for pattern in patterns:
-            for match in re.finditer(pattern, output, re.IGNORECASE):
+            for match in re.finditer(pattern, output, re.IGNORECASE | re.MULTILINE):
                groups = match.groups()
                
                # Determine day and part based on pattern
@@ -492,6 +565,10 @@ class CargoAOCRunner:
                    time_str = groups[1]
                    unit = groups[2].lower()
                    actual_day = day
+                elif len(groups) == 2:
+                    # Standalone runner/generator line (use provided day, assume part from context)
+                    # This is tricky - we'll skip these and rely on the block-based approach above
+                    continue
                else:
                    continue
                
@@ -512,17 +589,80 @@ class CargoAOCRunner:
                        logger.warning(f"Unknown time unit '{unit}', assuming nanoseconds")
                        time_ns = int(time_val)
                    
-                    results.append(PerformanceResult(
-                        user=user,
-                        year=year,
-                        day=actual_day,
-                        part=part_num,
-                        time_ns=time_ns,
-                        timestamp=timestamp
-                    ))
+                    # Avoid duplicates
+                    if not any(r.day == actual_day and r.part == part_num 
+                              for r in results):
+                        results.append(PerformanceResult(
+                            user=user,
+                            year=year,
+                            day=actual_day,
+                            part=part_num,
+                            time_ns=time_ns,
+                            timestamp=timestamp
+                        ))
                except ValueError:
                    logger.warning(f"Could not parse time: {time_str}")
        
+        # If no results found, try a more lenient approach - look for any numbers with time units
+        if not results:
+            # Look for patterns like "123.456ms" or "123.456 ms" anywhere in output
+            lenient_patterns = [
+                r'([\d.]+)\s*(ns|μs|us|ms|s|sec)',
+                r'([\d.]+)(ns|μs|us|ms|s|sec)',
+            ]
+            
+            # Try to extract parts sequentially if we find timing info
+            for pattern in lenient_patterns:
+                matches = list(re.finditer(pattern, output, re.IGNORECASE))
+                if matches:
+                    # If we find exactly 1 or 2 matches, assume they're Part 1 and Part 2
+                    if len(matches) == 1:
+                        match = matches[0]
+                        time_val = float(match.group(1))
+                        unit = match.group(2).lower()
+                        time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit)
+                        results.append(PerformanceResult(
+                            user=user,
+                            year=year,
+                            day=day,
+                            part=1,
+                            time_ns=time_ns,
+                            timestamp=timestamp
+                        ))
+                    elif len(matches) == 2:
+                        for idx, match in enumerate(matches, 1):
+                            time_val = float(match.group(1))
+                            unit = match.group(2).lower()
+                            time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit)
+                            results.append(PerformanceResult(
+                                user=user,
+                                year=year,
+                                day=day,
+                                part=idx,
+                                time_ns=time_ns,
+                                timestamp=timestamp
+                            ))
+                    break
+        
+        return results
+    
+    @staticmethod
+    def _convert_to_nanoseconds(time_val: float, unit: str) -> int:
+        """Convert time value to nanoseconds based on unit"""
+        unit = unit.lower()
+        # Handle unicode micro symbol (µ) and regular u
+        if unit == 's' or unit == 'sec' or unit == 'second':
+            return int(time_val * 1_000_000_000)
+        elif unit == 'ms' or unit == 'millisecond':
+            return int(time_val * 1_000_000)
+        elif unit == 'μs' or unit == 'µs' or unit == 'us' or unit == 'microsecond':
+            return int(time_val * 1_000)
+        elif unit == 'ns' or unit == 'nanosecond':
+            return int(time_val)
+        else:
+            # Default to nanoseconds
+            return int(time_val)
+        
        return results