Skip to content

Commit

Permalink
Integrate success and duration determination
Browse files Browse the repository at this point in the history
  • Loading branch information
jelmer committed Jan 28, 2023
1 parent 08f7914 commit 7620007
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 60 deletions.
122 changes: 64 additions & 58 deletions janitor/schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,32 +103,74 @@ def queue_item_from_candidate_and_publish_policy(row):
value, row['success_chance'])


async def estimate_success_probability(
async def _estimate_duration(
conn: asyncpg.Connection,
codebase: Optional[str] = None,
campaign: Optional[str] = None,
) -> Optional[timedelta]:
query = """
SELECT AVG(finish_time - start_time) FROM run
WHERE failure_transient is not True """
args: list[str] = []
if codebase is not None:
query += " AND codebase = $%d" % (len(args) + 1)
args.append(codebase)
if campaign is not None:
query += " AND suite = $%d" % (len(args) + 1)
args.append(campaign)
return await conn.fetchval(query, *args)


async def estimate_duration(
conn: asyncpg.Connection, codebase: str, campaign: str
) -> timedelta:
"""Estimate the duration of a codebase build for a certain campaign."""
estimated_duration = await _estimate_duration(
conn, codebase=codebase, campaign=campaign
)
if estimated_duration is not None:
return estimated_duration

estimated_duration = await _estimate_duration(conn, codebase=codebase)
if estimated_duration is not None:
return estimated_duration

estimated_duration = await _estimate_duration(conn, campaign=campaign)
if estimated_duration is not None:
return estimated_duration

return timedelta(seconds=DEFAULT_ESTIMATED_DURATION)


async def estimate_success_probability_and_duration(
conn: asyncpg.Connection, codebase: str, campaign: str, context: Optional[str] = None
) -> tuple[float, int]:
) -> tuple[float, timedelta, int]:
# TODO(jelmer): Bias this towards recent runs?
total = 0
success = 0
if context is None:
same_context_multiplier = 0.5
else:
same_context_multiplier = 1.0
durations = []
for run in await conn.fetch("""
SELECT
result_code, instigated_context, context, failure_details, failure_transient,
start_time
result_code, instigated_context, context, failure_details,
finish_time - start_time AS duration
FROM run
WHERE codebase = $1 AND suite = $2
WHERE codebase = $1 AND suite = $2 AND failure_transient IS NOT True
ORDER BY start_time DESC
""", codebase, campaign):
try:
ignore_checker = IGNORE_RESULT_CODE[run['result_code']]
except KeyError:
def ignore_checker(run):
return run['failure_transient']
return False

if ignore_checker(run):
continue

durations.append(run['duration'])
total += 1
if run['result_code'] == "success":
success += 1
Expand All @@ -148,54 +190,18 @@ def ignore_checker(run):
# we don't know the context.
same_context_multiplier = 1.0

return ((success * 10 + 1) / (total * 10 + 1) * same_context_multiplier), total


async def _estimate_duration(
conn: asyncpg.Connection,
codebase: Optional[str] = None,
campaign: Optional[str] = None,
limit: Optional[int] = 1000,
) -> Optional[timedelta]:
query = """
SELECT AVG(duration) FROM
(select finish_time - start_time as duration FROM run
WHERE """
args = []
if codebase is not None:
query += " codebase = $1"
args.append(codebase)
if campaign is not None:
if codebase:
query += " AND"
query += " suite = $%d" % (len(args) + 1)
args.append(campaign)
query += " ORDER BY finish_time DESC"
if limit is not None:
query += " LIMIT %d" % limit
query += ") as q"
return await conn.fetchval(query, *args)


async def estimate_duration(
conn: asyncpg.Connection, codebase: str, campaign: str
) -> timedelta:
"""Estimate the duration of a codebase build for a certain campaign."""
estimated_duration = await _estimate_duration(
conn, codebase=codebase, campaign=campaign
)
if estimated_duration is not None:
return estimated_duration

estimated_duration = await _estimate_duration(conn, codebase=codebase)
if estimated_duration is not None:
return estimated_duration

estimated_duration = await _estimate_duration(conn, campaign=campaign)
if estimated_duration is not None:
return estimated_duration
# It's going to be hard to estimate the duration, but other codemods
# might be a good candidate
estimated_duration = await _estimate_duration(conn, codebase=codebase)
if estimated_duration is None:
estimated_duration = await _estimate_duration(conn, campaign=campaign)
if estimated_duration is None:
estimated_duration = timedelta(seconds=DEFAULT_ESTIMATED_DURATION)
else:
estimated_duration = timedelta(
seconds=(sum([d.total_seconds() for d in durations]) / len(durations)))

return timedelta(seconds=DEFAULT_ESTIMATED_DURATION)
return ((success * 10 + 1) / (total * 10 + 1) * same_context_multiplier), estimated_duration, total


# Overhead of doing a run; estimated to be roughly 20s
Expand Down Expand Up @@ -288,14 +294,14 @@ async def do_schedule_regular(
context = row['context']
if row is not None and command is None:
command = row['command']
estimated_duration = await estimate_duration(conn, codebase, campaign)
assert estimated_duration >= timedelta(
0
), "{}: estimated duration < 0.0: {!r}".format(codebase, estimated_duration)
(
estimated_probability_of_success,
estimated_duration,
total_previous_runs,
) = await estimate_success_probability(conn, codebase, campaign, context)
) = await estimate_success_probability_and_duration(conn, codebase, campaign, context)

assert estimated_duration >= timedelta(0), \
f"{codebase}: estimated duration < 0.0: {estimated_duration!r}"

if normalized_codebase_value is None:
normalized_codebase_value = await conn.fetchval(
Expand Down
4 changes: 2 additions & 2 deletions janitor/site/pkg.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ async def generate_run_file(
differ_url: Optional[str], publisher_url: Optional[str], logfile_manager, run,
vcs_managers: Dict[str, VcsManager], is_admin, span
):
from ..schedule import estimate_success_probability
from ..schedule import estimate_success_probability_and_duration
kwargs = {}
kwargs["run"] = run
kwargs["run_id"] = run['id']
Expand Down Expand Up @@ -166,7 +166,7 @@ async def generate_run_file(
'FROM review WHERE run_id = $1',
run['id'])
with span.new_child('sql:success-probability'):
kwargs["success_probability"], kwargs["total_previous_runs"] = await estimate_success_probability(
kwargs["success_probability"], kwargs['estimated_duration'], kwargs["total_previous_runs"] = await estimate_success_probability_and_duration(
conn, run['package'], run['suite'])
with span.new_child('sql:followups'):
kwargs['followups'] = await conn.fetch("""SELECT \
Expand Down

0 comments on commit 7620007

Please sign in to comment.