From 61b10131b41374bb8b7de1df1f83ef5c93f9f395 Mon Sep 17 00:00:00 2001
From: Bill Thiede <git@xinu.tv>
Date: Tue, 2 Dec 2025 16:30:00 -0800
Subject: [PATCH] Fix parsing for cargo aoc

---
 aocsync.py | 178 +++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 159 insertions(+), 19 deletions(-)
diff --git a/aocsync.py b/aocsync.py
index 944f96a..ad3bb70 100755
--- a/aocsync.py
+++ b/aocsync.py
@@ -422,6 +422,10 @@ class CargoAOCRunner:
                     logger.warning(f"cargo aoc failed for day {day} in {work_dir}: {result.stderr}")
                     continue
                 
+                # Log output for debugging if no results found
+                if not result.stdout.strip() and not result.stderr.strip():
+                    logger.warning(f"No output from cargo aoc for {user} year {year} day {day}")
+                
                 # Parse output for runtime information
                 day_results = CargoAOCRunner._parse_runtime_output(
                     result.stdout, result.stderr, day, year, user
@@ -429,7 +433,9 @@ class CargoAOCRunner:
                 if day_results:
                     logger.info(f"Parsed {len(day_results)} runtime result(s) for {user} year {year} day {day}")
                 else:
-                    logger.warning(f"No runtime data parsed for {user} year {year} day {day}")
+                    # Log a sample of the output to help debug parsing issues
+                    output_sample = (result.stdout + "\n" + result.stderr).strip()[:500]
+                    logger.warning(f"No runtime data parsed for {user} year {year} day {day}. Output sample: {output_sample}")
                 results.extend(day_results)
                 
             except subprocess.TimeoutExpired:
@@ -448,6 +454,7 @@ class CargoAOCRunner:
         - "Day X - Part Y: XXX.XXX ms"
         - "Day X - Part Y: XXX.XXX μs"
         - "Day X - Part Y: XXX.XXX ns"
+        - "Part Y: XXX.XXX ms"
         - Or similar formats
         """
         results = []
@@ -462,20 +469,86 @@ class CargoAOCRunner:
         # "Day 1 Part 1: 123.456 ms"
         # "day 1 - part 1: 123.456 ms"
         # "Part 1: 123.456 ms" (when day is already known)
+        # Also handle formats like "Day 01 - Part 1" or "Day 1, Part 1"
+        # And the format with generator/runner on separate lines:
+        # "Day 2 - Part 1 : <answer>"
+        # "        generator: 5.651µs,"
+        # "        runner: 3.07µs"
         patterns = [
-            # Full format with day and part
-            r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
-            r'day\s+(\d+)\s*-\s*part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
-            r'Day\s+(\d+)\s+Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
-            r'day\s+(\d+)\s+part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
-            r'day(\d+)\s*-\s*part(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
-            # Part only (use provided day)
-            r'Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
-            r'part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
+            # Format with generator/runner on separate lines (most common cargo-aoc format)
+            # Match "Day X - Part Y" followed by lines with "runner:" or "generator:"
+            r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]+.*?(?:^|\n).*?runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'day\s+(\d+)\s*-\s*part\s+(\d+)[:\s]+.*?(?:^|\n).*?runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            # Standalone runner/generator lines (for when we're already in a Day X - Part Y block)
+            r'runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'generator\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            # Full format with day and part - various separators
+            r'Day\s+(\d+)\s*[-,\s]+\s*Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'day\s+(\d+)\s*[-,\s]+\s*part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'Day\s+(\d+)\s+Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'day\s+(\d+)\s+part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'day(\d+)\s*[-,\s]+\s*part(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            # Part only (use provided day) - more flexible
+            r'Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            # Handle formats without explicit "Part" label
+            r'Day\s+(\d+)\s*[-,\s]+\s*(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            # Handle formats with parentheses or brackets
+            r'\(Part\s+(\d+)\)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
+            r'\[Part\s+(\d+)\][:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
         ]
         
+        # First, try to parse the generator/runner format which is most common
+        # Look for "Day X - Part Y" lines and extract runner times from following lines
+        lines = output.split('\n')
+        current_day = None
+        current_part = None
+        
+        for i, line in enumerate(lines):
+            # Check if this line starts a new Day/Part block
+            day_part_match = re.match(r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]', line, re.IGNORECASE)
+            if day_part_match:
+                current_day = int(day_part_match.group(1))
+                current_part = int(day_part_match.group(2))
+                actual_day = current_day if current_day > 0 and current_day <= 25 else day
+                continue
+            
+            # If we're in a Day/Part block, look for runner timing
+            if current_day is not None and current_part is not None:
+                runner_match = re.search(r'runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', line, re.IGNORECASE)
+                if runner_match:
+                    time_str = runner_match.group(1)
+                    unit = runner_match.group(2).lower()
+                    try:
+                        time_val = float(time_str)
+                        time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit)
+                        results.append(PerformanceResult(
+                            user=user,
+                            year=year,
+                            day=actual_day,
+                            part=current_part,
+                            time_ns=time_ns,
+                            timestamp=timestamp
+                        ))
+                        # Reset after finding runner (in case there are multiple parts)
+                        # But keep current_day/current_part until we hit the next Day line
+                    except ValueError:
+                        logger.warning(f"Could not parse runner time: {time_str}")
+                
+                # Check if next line starts a new Day/Part block (reset current context)
+                if i + 1 < len(lines):
+                    next_day_match = re.match(r'Day\s+\d+\s*-\s*Part\s+\d+', lines[i + 1], re.IGNORECASE)
+                    if next_day_match:
+                        # Don't reset yet - let the next iteration handle it
+                        pass
+        
+        # If we found results with the line-by-line approach, return them
+        if results:
+            return results
+        
+        # Otherwise, try the original pattern-based approach
         for pattern in patterns:
-            for match in re.finditer(pattern, output, re.IGNORECASE):
+            for match in re.finditer(pattern, output, re.IGNORECASE | re.MULTILINE):
                 groups = match.groups()
                 
                 # Determine day and part based on pattern
@@ -492,6 +565,10 @@ class CargoAOCRunner:
                     time_str = groups[1]
                     unit = groups[2].lower()
                     actual_day = day
+                elif len(groups) == 2:
+                    # Standalone runner/generator line (use provided day, assume part from context)
+                    # This is tricky - we'll skip these and rely on the block-based approach above
+                    continue
                 else:
                     continue
                 
@@ -512,17 +589,80 @@ class CargoAOCRunner:
                         logger.warning(f"Unknown time unit '{unit}', assuming nanoseconds")
                         time_ns = int(time_val)
                     
-                    results.append(PerformanceResult(
-                        user=user,
-                        year=year,
-                        day=actual_day,
-                        part=part_num,
-                        time_ns=time_ns,
-                        timestamp=timestamp
-                    ))
+                    # Avoid duplicates
+                    if not any(r.day == actual_day and r.part == part_num 
+                              for r in results):
+                        results.append(PerformanceResult(
+                            user=user,
+                            year=year,
+                            day=actual_day,
+                            part=part_num,
+                            time_ns=time_ns,
+                            timestamp=timestamp
+                        ))
                 except ValueError:
                     logger.warning(f"Could not parse time: {time_str}")
         
+        # If no results found, try a more lenient approach - look for any numbers with time units
+        if not results:
+            # Look for patterns like "123.456ms" or "123.456 ms" anywhere in output
+            lenient_patterns = [
+                r'([\d.]+)\s*(ns|μs|us|ms|s|sec)',
+                r'([\d.]+)(ns|μs|us|ms|s|sec)',
+            ]
+            
+            # Try to extract parts sequentially if we find timing info
+            for pattern in lenient_patterns:
+                matches = list(re.finditer(pattern, output, re.IGNORECASE))
+                if matches:
+                    # If we find exactly 1 or 2 matches, assume they're Part 1 and Part 2
+                    if len(matches) == 1:
+                        match = matches[0]
+                        time_val = float(match.group(1))
+                        unit = match.group(2).lower()
+                        time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit)
+                        results.append(PerformanceResult(
+                            user=user,
+                            year=year,
+                            day=day,
+                            part=1,
+                            time_ns=time_ns,
+                            timestamp=timestamp
+                        ))
+                    elif len(matches) == 2:
+                        for idx, match in enumerate(matches, 1):
+                            time_val = float(match.group(1))
+                            unit = match.group(2).lower()
+                            time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit)
+                            results.append(PerformanceResult(
+                                user=user,
+                                year=year,
+                                day=day,
+                                part=idx,
+                                time_ns=time_ns,
+                                timestamp=timestamp
+                            ))
+                    break
+        
+        return results
+    
+    @staticmethod
+    def _convert_to_nanoseconds(time_val: float, unit: str) -> int:
+        """Convert time value to nanoseconds based on unit"""
+        unit = unit.lower()
+        # Handle unicode micro symbol (µ) and regular u
+        if unit == 's' or unit == 'sec' or unit == 'second':
+            return int(time_val * 1_000_000_000)
+        elif unit == 'ms' or unit == 'millisecond':
+            return int(time_val * 1_000_000)
+        elif unit == 'μs' or unit == 'µs' or unit == 'us' or unit == 'microsecond':
+            return int(time_val * 1_000)
+        elif unit == 'ns' or unit == 'nanosecond':
+            return int(time_val)
+        else:
+            # Default to nanoseconds
+            return int(time_val)
+        
         return results