From 7620007f32fd414a3e6f0c34af58db3eb295f1ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jelmer=20Vernoo=C4=B3?= Date: Sat, 28 Jan 2023 14:50:02 +0000 Subject: [PATCH] Integrate success and duration determination --- janitor/schedule.py | 122 +++++++++++++++++++++++--------------------- janitor/site/pkg.py | 4 +- 2 files changed, 66 insertions(+), 60 deletions(-) diff --git a/janitor/schedule.py b/janitor/schedule.py index 580d8ec5..861189cd 100644 --- a/janitor/schedule.py +++ b/janitor/schedule.py @@ -103,9 +103,48 @@ def queue_item_from_candidate_and_publish_policy(row): value, row['success_chance']) -async def estimate_success_probability( +async def _estimate_duration( + conn: asyncpg.Connection, + codebase: Optional[str] = None, + campaign: Optional[str] = None, +) -> Optional[timedelta]: + query = """ +SELECT AVG(finish_time - start_time) FROM run +WHERE failure_transient is not True """ + args: list[str] = [] + if codebase is not None: + query += " AND codebase = $%d" % (len(args) + 1) + args.append(codebase) + if campaign is not None: + query += " AND suite = $%d" % (len(args) + 1) + args.append(campaign) + return await conn.fetchval(query, *args) + + +async def estimate_duration( + conn: asyncpg.Connection, codebase: str, campaign: str +) -> timedelta: + """Estimate the duration of a codebase build for a certain campaign.""" + estimated_duration = await _estimate_duration( + conn, codebase=codebase, campaign=campaign + ) + if estimated_duration is not None: + return estimated_duration + + estimated_duration = await _estimate_duration(conn, codebase=codebase) + if estimated_duration is not None: + return estimated_duration + + estimated_duration = await _estimate_duration(conn, campaign=campaign) + if estimated_duration is not None: + return estimated_duration + + return timedelta(seconds=DEFAULT_ESTIMATED_DURATION) + + +async def estimate_success_probability_and_duration( conn: asyncpg.Connection, codebase: str, campaign: str, context: Optional[str] = None -) -> tuple[float, int]: +) -> tuple[float, timedelta, int]: # TODO(jelmer): Bias this towards recent runs? total = 0 success = 0 @@ -113,22 +152,25 @@ async def estimate_success_probability( same_context_multiplier = 0.5 else: same_context_multiplier = 1.0 + durations = [] for run in await conn.fetch(""" SELECT - result_code, instigated_context, context, failure_details, failure_transient, - start_time + result_code, instigated_context, context, failure_details, + finish_time - start_time AS duration FROM run -WHERE codebase = $1 AND suite = $2 +WHERE codebase = $1 AND suite = $2 AND failure_transient IS NOT True ORDER BY start_time DESC """, codebase, campaign): try: ignore_checker = IGNORE_RESULT_CODE[run['result_code']] except KeyError: def ignore_checker(run): - return run['failure_transient'] + return False if ignore_checker(run): continue + + durations.append(run['duration']) total += 1 if run['result_code'] == "success": success += 1 @@ -148,54 +190,18 @@ def ignore_checker(run): # we don't know the context. same_context_multiplier = 1.0 - return ((success * 10 + 1) / (total * 10 + 1) * same_context_multiplier), total - - -async def _estimate_duration( - conn: asyncpg.Connection, - codebase: Optional[str] = None, - campaign: Optional[str] = None, - limit: Optional[int] = 1000, -) -> Optional[timedelta]: - query = """ -SELECT AVG(duration) FROM -(select finish_time - start_time as duration FROM run -WHERE """ - args = [] - if codebase is not None: - query += " codebase = $1" - args.append(codebase) - if campaign is not None: - if codebase: - query += " AND" - query += " suite = $%d" % (len(args) + 1) - args.append(campaign) - query += " ORDER BY finish_time DESC" - if limit is not None: - query += " LIMIT %d" % limit - query += ") as q" - return await conn.fetchval(query, *args) - - -async def estimate_duration( - conn: asyncpg.Connection, codebase: str, campaign: str -) -> timedelta: - """Estimate the duration of a codebase build for a certain campaign.""" - estimated_duration = await _estimate_duration( - conn, codebase=codebase, campaign=campaign - ) - if estimated_duration is not None: - return estimated_duration - - estimated_duration = await _estimate_duration(conn, codebase=codebase) - if estimated_duration is not None: - return estimated_duration - - estimated_duration = await _estimate_duration(conn, campaign=campaign) - if estimated_duration is not None: - return estimated_duration + # It's going to be hard to estimate the duration, but other codemods + # might be a good candidate + estimated_duration = await _estimate_duration(conn, codebase=codebase) + if estimated_duration is None: + estimated_duration = await _estimate_duration(conn, campaign=campaign) + if estimated_duration is None: + estimated_duration = timedelta(seconds=DEFAULT_ESTIMATED_DURATION) + else: + estimated_duration = timedelta( + seconds=(sum([d.total_seconds() for d in durations]) / len(durations))) - return timedelta(seconds=DEFAULT_ESTIMATED_DURATION) + return ((success * 10 + 1) / (total * 10 + 1) * same_context_multiplier), estimated_duration, total # Overhead of doing a run; estimated to be roughly 20s @@ -288,14 +294,14 @@ async def do_schedule_regular( context = row['context'] if row is not None and command is None: command = row['command'] - estimated_duration = await estimate_duration(conn, codebase, campaign) - assert estimated_duration >= timedelta( - 0 - ), "{}: estimated duration < 0.0: {!r}".format(codebase, estimated_duration) ( estimated_probability_of_success, + estimated_duration, total_previous_runs, - ) = await estimate_success_probability(conn, codebase, campaign, context) + ) = await estimate_success_probability_and_duration(conn, codebase, campaign, context) + + assert estimated_duration >= timedelta(0), \ + f"{codebase}: estimated duration < 0.0: {estimated_duration!r}" if normalized_codebase_value is None: normalized_codebase_value = await conn.fetchval( diff --git a/janitor/site/pkg.py b/janitor/site/pkg.py index 14bf469a..6238bd63 100644 --- a/janitor/site/pkg.py +++ b/janitor/site/pkg.py @@ -134,7 +134,7 @@ async def generate_run_file( differ_url: Optional[str], publisher_url: Optional[str], logfile_manager, run, vcs_managers: Dict[str, VcsManager], is_admin, span ): - from ..schedule import estimate_success_probability + from ..schedule import estimate_success_probability_and_duration kwargs = {} kwargs["run"] = run kwargs["run_id"] = run['id'] @@ -166,7 +166,7 @@ async def generate_run_file( 'FROM review WHERE run_id = $1', run['id']) with span.new_child('sql:success-probability'): - kwargs["success_probability"], kwargs["total_previous_runs"] = await estimate_success_probability( + kwargs["success_probability"], kwargs['estimated_duration'], kwargs["total_previous_runs"] = await estimate_success_probability_and_duration( conn, run['package'], run['suite']) with span.new_child('sql:followups'): kwargs['followups'] = await conn.fetch("""SELECT \