Only rerun for years that have changed

This commit is contained in:
Bill Thiede 2025-12-04 16:43:47 -08:00
parent 8b229237c9
commit 88ec3631ab

View File

@ -354,6 +354,70 @@ class GitManager:
except Exception as e:
logger.error(f"Error checking for changes: {e}")
return True # Assume changes to be safe
def has_year_changes(self, repo_path: Path, year: int, last_git_rev: str = "") -> bool:
"""Check if a specific year directory has changes since last_git_rev
Args:
repo_path: Path to the repository root
year: Year to check
last_git_rev: Last git revision we processed (empty string means check all changes)
Returns:
True if year directory has changes, False otherwise
"""
repo_path = Path(repo_path)
if not repo_path.exists() or not (repo_path / '.git').exists():
return True # Needs to be cloned
try:
# Check if year directory exists
year_dir = repo_path / str(year)
if not year_dir.exists() or not year_dir.is_dir():
return False # Year directory doesn't exist, no changes
# Get current HEAD revision
result = subprocess.run(
['git', 'rev-parse', '--short', 'HEAD'],
cwd=repo_path,
capture_output=True,
text=True
)
if result.returncode != 0:
return True # Can't determine, assume changes
current_rev = result.stdout.strip()
# If no last_git_rev, check if there are any commits affecting this year
if not last_git_rev:
# Check if year directory has any commits
result = subprocess.run(
['git', 'log', '--oneline', '--', str(year)],
cwd=repo_path,
capture_output=True,
text=True
)
return bool(result.stdout.strip())
# Check if current revision is different from last processed
if current_rev != last_git_rev:
# Check if year directory was modified between last_git_rev and current
result = subprocess.run(
['git', 'diff', '--name-only', last_git_rev, 'HEAD', '--', str(year)],
cwd=repo_path,
capture_output=True,
text=True
)
if result.returncode == 0:
return bool(result.stdout.strip())
return False
except Exception as e:
logger.error(f"Error checking year changes for {year}: {e}")
return True # Assume changes to be safe
class CargoAOCRunner:
@ -1799,42 +1863,57 @@ class AOCSync:
url = repo_config['url']
local_path = repo_config['local_path']
if self.force_rerun or self.git_manager.has_changes(url, local_path):
if self.force_rerun:
logger.info(f"Force rerun enabled, processing repository {user_name}...")
# Always update the repo to get latest changes
if self.git_manager.clone_or_update_repo(url, local_path):
repo_path = Path(local_path)
# Check if years are specified in config
config_years = repo_config.get('years')
url = repo_config['url']
years_to_process = []
if config_years:
# Use years from config
years_to_process = config_years
else:
logger.info(f"Repository {user_name} has changes, updating...")
if self.git_manager.clone_or_update_repo(url, local_path):
repo_path = Path(local_path)
# Try to determine year(s) from the repository
years = CargoAOCRunner.extract_years_from_repo(repo_path)
# Check if years are specified in config
config_years = repo_config.get('years')
url = repo_config['url']
if config_years:
# Use years from config
for year in config_years:
if years:
years_to_process = years
else:
# If no year detected, check for year directories
logger.warning(f"No year detected for {user_name}, checking for year directories")
# Try common years as fallback
for try_year in [2025, 2024, 2023, 2022, 2021, 2020]:
year_dir = repo_path / str(try_year)
if year_dir.exists() and year_dir.is_dir():
logger.info(f"Found year directory {try_year} for {user_name}")
years_to_process.append(try_year)
# Process each year, checking for changes if not forcing
for year in years_to_process:
if self.force_rerun:
logger.info(f"Force rerun enabled, processing {user_name} year {year}...")
self._run_and_store_benchmarks(repo_path, year, user_name,
repo_url=url, is_multi_year=False)
else:
# Get last git_rev for this user/year from database
last_results = self.db.get_latest_results(years=[year], days=None)
last_git_rev = ""
for result in last_results:
if result['user'] == user_name and result['year'] == year:
last_git_rev = result.get('git_rev', '')
break
# Check if this year has changes
if self.git_manager.has_year_changes(repo_path, year, last_git_rev):
logger.info(f"Year {year} for {user_name} has changes, running benchmarks...")
self._run_and_store_benchmarks(repo_path, year, user_name,
repo_url=url, is_multi_year=False)
else:
# Try to determine year(s) from the repository
years = CargoAOCRunner.extract_years_from_repo(repo_path)
if years:
# Run benchmarks for each detected year
for year in years:
self._run_and_store_benchmarks(repo_path, year, user_name,
repo_url=url, is_multi_year=False)
else:
# If no year detected, check for year directories
logger.warning(f"No year detected for {user_name}, checking for year directories")
# Try common years as fallback
for try_year in [2025, 2024, 2023, 2022, 2021, 2020]:
year_dir = repo_path / str(try_year)
if year_dir.exists() and year_dir.is_dir():
logger.info(f"Found year directory {try_year} for {user_name}")
self._run_and_store_benchmarks(repo_path, try_year, user_name,
repo_url=url, is_multi_year=False)
logger.info(f"Year {year} for {user_name} has no changes, skipping...")
elif repo_type == 'multi-year':
# Multiple repositories, one per year