Fix parsing for cargo aoc

This commit is contained in:
Bill Thiede 2025-12-02 16:30:00 -08:00
parent 086fc2ff3e
commit 61b10131b4

View File

@ -422,6 +422,10 @@ class CargoAOCRunner:
logger.warning(f"cargo aoc failed for day {day} in {work_dir}: {result.stderr}")
continue
# Log output for debugging if no results found
if not result.stdout.strip() and not result.stderr.strip():
logger.warning(f"No output from cargo aoc for {user} year {year} day {day}")
# Parse output for runtime information
day_results = CargoAOCRunner._parse_runtime_output(
result.stdout, result.stderr, day, year, user
@ -429,7 +433,9 @@ class CargoAOCRunner:
if day_results:
logger.info(f"Parsed {len(day_results)} runtime result(s) for {user} year {year} day {day}")
else:
logger.warning(f"No runtime data parsed for {user} year {year} day {day}")
# Log a sample of the output to help debug parsing issues
output_sample = (result.stdout + "\n" + result.stderr).strip()[:500]
logger.warning(f"No runtime data parsed for {user} year {year} day {day}. Output sample: {output_sample}")
results.extend(day_results)
except subprocess.TimeoutExpired:
@ -448,6 +454,7 @@ class CargoAOCRunner:
- "Day X - Part Y: XXX.XXX ms"
- "Day X - Part Y: XXX.XXX μs"
- "Day X - Part Y: XXX.XXX ns"
- "Part Y: XXX.XXX ms"
- Or similar formats
"""
results = []
@ -462,20 +469,86 @@ class CargoAOCRunner:
# "Day 1 Part 1: 123.456 ms"
# "day 1 - part 1: 123.456 ms"
# "Part 1: 123.456 ms" (when day is already known)
# Also handle formats like "Day 01 - Part 1" or "Day 1, Part 1"
# And the format with generator/runner on separate lines:
# "Day 2 - Part 1 : <answer>"
# " generator: 5.651µs,"
# " runner: 3.07µs"
patterns = [
# Full format with day and part
r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
r'day\s+(\d+)\s*-\s*part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
r'Day\s+(\d+)\s+Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
r'day\s+(\d+)\s+part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
r'day(\d+)\s*-\s*part(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
# Part only (use provided day)
r'Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
r'part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|us|ms|s)',
# Format with generator/runner on separate lines (most common cargo-aoc format)
# Match "Day X - Part Y" followed by lines with "runner:" or "generator:"
r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]+.*?(?:^|\n).*?runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'day\s+(\d+)\s*-\s*part\s+(\d+)[:\s]+.*?(?:^|\n).*?runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
# Standalone runner/generator lines (for when we're already in a Day X - Part Y block)
r'runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'generator\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
# Full format with day and part - various separators
r'Day\s+(\d+)\s*[-,\s]+\s*Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'day\s+(\d+)\s*[-,\s]+\s*part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'Day\s+(\d+)\s+Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'day\s+(\d+)\s+part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'day(\d+)\s*[-,\s]+\s*part(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
# Part only (use provided day) - more flexible
r'Part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'part\s+(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
# Handle formats without explicit "Part" label
r'Day\s+(\d+)\s*[-,\s]+\s*(\d+)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
# Handle formats with parentheses or brackets
r'\(Part\s+(\d+)\)[:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
r'\[Part\s+(\d+)\][:\s]+([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)',
]
# First, try to parse the generator/runner format which is most common
# Look for "Day X - Part Y" lines and extract runner times from following lines
lines = output.split('\n')
current_day = None
current_part = None
for i, line in enumerate(lines):
# Check if this line starts a new Day/Part block
day_part_match = re.match(r'Day\s+(\d+)\s*-\s*Part\s+(\d+)[:\s]', line, re.IGNORECASE)
if day_part_match:
current_day = int(day_part_match.group(1))
current_part = int(day_part_match.group(2))
actual_day = current_day if current_day > 0 and current_day <= 25 else day
continue
# If we're in a Day/Part block, look for runner timing
if current_day is not None and current_part is not None:
runner_match = re.search(r'runner\s*:\s*([\d.]+)\s*(ns|μs|µs|us|ms|s|sec)', line, re.IGNORECASE)
if runner_match:
time_str = runner_match.group(1)
unit = runner_match.group(2).lower()
try:
time_val = float(time_str)
time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit)
results.append(PerformanceResult(
user=user,
year=year,
day=actual_day,
part=current_part,
time_ns=time_ns,
timestamp=timestamp
))
# Reset after finding runner (in case there are multiple parts)
# But keep current_day/current_part until we hit the next Day line
except ValueError:
logger.warning(f"Could not parse runner time: {time_str}")
# Check if next line starts a new Day/Part block (reset current context)
if i + 1 < len(lines):
next_day_match = re.match(r'Day\s+\d+\s*-\s*Part\s+\d+', lines[i + 1], re.IGNORECASE)
if next_day_match:
# Don't reset yet - let the next iteration handle it
pass
# If we found results with the line-by-line approach, return them
if results:
return results
# Otherwise, try the original pattern-based approach
for pattern in patterns:
for match in re.finditer(pattern, output, re.IGNORECASE):
for match in re.finditer(pattern, output, re.IGNORECASE | re.MULTILINE):
groups = match.groups()
# Determine day and part based on pattern
@ -492,6 +565,10 @@ class CargoAOCRunner:
time_str = groups[1]
unit = groups[2].lower()
actual_day = day
elif len(groups) == 2:
# Standalone runner/generator line (use provided day, assume part from context)
# This is tricky - we'll skip these and rely on the block-based approach above
continue
else:
continue
@ -512,17 +589,80 @@ class CargoAOCRunner:
logger.warning(f"Unknown time unit '{unit}', assuming nanoseconds")
time_ns = int(time_val)
results.append(PerformanceResult(
user=user,
year=year,
day=actual_day,
part=part_num,
time_ns=time_ns,
timestamp=timestamp
))
# Avoid duplicates
if not any(r.day == actual_day and r.part == part_num
for r in results):
results.append(PerformanceResult(
user=user,
year=year,
day=actual_day,
part=part_num,
time_ns=time_ns,
timestamp=timestamp
))
except ValueError:
logger.warning(f"Could not parse time: {time_str}")
# If no results found, try a more lenient approach - look for any numbers with time units
if not results:
# Look for patterns like "123.456ms" or "123.456 ms" anywhere in output
lenient_patterns = [
r'([\d.]+)\s*(ns|μs|us|ms|s|sec)',
r'([\d.]+)(ns|μs|us|ms|s|sec)',
]
# Try to extract parts sequentially if we find timing info
for pattern in lenient_patterns:
matches = list(re.finditer(pattern, output, re.IGNORECASE))
if matches:
# If we find exactly 1 or 2 matches, assume they're Part 1 and Part 2
if len(matches) == 1:
match = matches[0]
time_val = float(match.group(1))
unit = match.group(2).lower()
time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit)
results.append(PerformanceResult(
user=user,
year=year,
day=day,
part=1,
time_ns=time_ns,
timestamp=timestamp
))
elif len(matches) == 2:
for idx, match in enumerate(matches, 1):
time_val = float(match.group(1))
unit = match.group(2).lower()
time_ns = CargoAOCRunner._convert_to_nanoseconds(time_val, unit)
results.append(PerformanceResult(
user=user,
year=year,
day=day,
part=idx,
time_ns=time_ns,
timestamp=timestamp
))
break
return results
@staticmethod
def _convert_to_nanoseconds(time_val: float, unit: str) -> int:
"""Convert time value to nanoseconds based on unit"""
unit = unit.lower()
# Handle unicode micro symbol (µ) and regular u
if unit == 's' or unit == 'sec' or unit == 'second':
return int(time_val * 1_000_000_000)
elif unit == 'ms' or unit == 'millisecond':
return int(time_val * 1_000_000)
elif unit == 'μs' or unit == 'µs' or unit == 'us' or unit == 'microsecond':
return int(time_val * 1_000)
elif unit == 'ns' or unit == 'nanosecond':
return int(time_val)
else:
# Default to nanoseconds
return int(time_val)
return results