From 0e4b59061c2fbb76d3ce9fa045919ca8c9083e0a Mon Sep 17 00:00:00 2001 From: David Anderson Date: Thu, 28 Nov 2024 01:13:51 -0800 Subject: [PATCH] - improve batch page correctly show # of in progress, error, done, unsent jobs fraction done is frac of success jobs - Add 'verbose' checkbox for BUDA job submit. docker_wrapper prints stuff to stderr (can view on result page) - fix bugs in non-BUDA submit pages --- html/inc/bootstrap.inc | 10 ++- html/inc/submit_util.inc | 76 +++++++++++++++-------- html/user/buda_submit.php | 20 ++++-- html/user/get_output.php | 8 +-- html/user/get_output2.php | 4 +- html/user/job_file.php | 4 +- html/user/sandbox.php | 10 +-- html/user/submit.php | 115 +++++++++++++++++++++-------------- html/user/submit_example.php | 4 +- 9 files changed, 157 insertions(+), 94 deletions(-) diff --git a/html/inc/bootstrap.inc b/html/inc/bootstrap.inc index 3996a06915f..e9eb0cb0601 100644 --- a/html/inc/bootstrap.inc +++ b/html/inc/bootstrap.inc @@ -500,8 +500,14 @@ function form_submit($text, $attrs='') { function form_checkbox($label, $name, $checked=false) { echo sprintf('
-   %s + +
+ ', + FORM_LEFT_CLASS, $label, FORM_RIGHT_CLASS + ); + echo sprintf(' +
- ', $name, $checked?"checked":"", $label + ', $name, $checked?"checked":"" ); } diff --git a/html/inc/submit_util.inc b/html/inc/submit_util.inc index c30fb770c74..a3731be6f79 100644 --- a/html/inc/submit_util.inc +++ b/html/inc/submit_util.inc @@ -78,7 +78,7 @@ function has_submit_access($user, $app_id) { function has_admin_access($user, $app_id) { $us = BoincUserSubmit::lookup_userid($user->id); if (!$us) return false; - if ($us->admin_all) return true; + if ($us->manage_all) return true; $usa = BoincUserSubmitApp::lookup("user_id=$user->id and app_id=$app_id"); if (!$usa) return false; return $usa->manage; @@ -126,11 +126,23 @@ function delete_remote_submit_user($user) { BoincUserSubmitApp::delete_user($user->id); } - -// given its WUs, compute progress of a batch -// (fraction done, est completion time etc.) -// NOTE: this is inefficient because we need all the WUs. -// it could be done by server components +// given its WUs, compute parameters of a batch: +// credit_canonical: credit granted to canonical instances +// fraction_done: frac of jobs that are done (success or failed) +// state: whether complete (all jobs done) +// completion_time: if newly complete +// nerror_jobs: # of failed jobs +// Update the above in DB. +// Also compute (not in DB): +// njobs_success: # of jobs with canonical instance +// njobs_in_prog: # of jobs not success or fail, +// and at least one result in progress +// +// return the batch object, with these values +// +// NOTE: this involves reading the batch's WUs and results, +// which could be inefficient for huge batches. +// It could instead be done by server components // (transitioner, validator etc.) as jobs complete or time out // // TODO: update est_completion_time @@ -141,51 +153,61 @@ function get_batch_params($batch, $wus) { // return $batch; } + if (!$wus) { + if ($batch->njobs) { + $batch->update('njobs=0'); + $batch->njobs = 0; + } + return $batch; + } + + // make list of WU IDs with an in-progress result + $res_in_prog = BoincResult::enum( + sprintf('batch=%d and server_state<>%d', + $batch->id, RESULT_SERVER_STATE_IN_PROGRESS + ) + ); + $wus_in_prog = []; + foreach ($res_in_prog as $res) { + $wus_in_prog[$res->workunitid] = true; + } + $fp_total = 0; $fp_done = 0; $completed = true; $batch->nerror_jobs = 0; $batch->credit_canonical = 0; + $njobs_success = 0; + $njobs_in_prog = 0; foreach ($wus as $wu) { $fp_total += $wu->rsc_fpops_est; if ($wu->canonical_resultid) { $fp_done += $wu->rsc_fpops_est; + $njobs_success++; $batch->credit_canonical += $wu->canonical_credit; } else if ($wu->error_mask) { $batch->nerror_jobs++; } else { $completed = false; + if (array_key_exists($wu->id, $wus_in_prog)) { + $njobs_in_prog++; + } } } - if ($fp_total) { - $batch->fraction_done = $fp_done / $fp_total; - } + $njobs = count($wus); + $batch->njobs = $njobs; + $batch->fraction_done = ($njobs_success + $batch->nerror_jobs)/$batch->njobs; if ($completed && $batch->state == BATCH_STATE_IN_PROGRESS) { $batch->state = BATCH_STATE_COMPLETE; $batch->completion_time = time(); } - $batch->update("fraction_done = $batch->fraction_done, nerror_jobs = $batch->nerror_jobs, state=$batch->state, completion_time = $batch->completion_time, credit_canonical = $batch->credit_canonical"); + $batch->update("fraction_done = $batch->fraction_done, nerror_jobs = $batch->nerror_jobs, state=$batch->state, completion_time = $batch->completion_time, credit_canonical = $batch->credit_canonical, njobs=$njobs"); - $batch->credit_estimate = flops_to_credit($fp_total); + $batch->njobs_success = $njobs_success; + $batch->njobs_in_prog = $njobs_in_prog; return $batch; } -// get the number of WUs for which we've sent at least 1 instance -// TODO: do this more efficiently (single query) -// -function wus_nsent($wus) { - $n = 0; - foreach ($wus as $wu) { - $res = BoincResult::enum( - sprintf('workunitid=%d and server_state<>%d', - $wu->id, RESULT_SERVER_STATE_UNSENT - ) - ); - if (count($res) > 0) $n++; - } - return $n; -} - // get the physical names of a result's output files. // function get_outfile_phys_names($result) { diff --git a/html/user/buda_submit.php b/html/user/buda_submit.php index 307c4d6d11d..901b7b419a7 100644 --- a/html/user/buda_submit.php +++ b/html/user/buda_submit.php @@ -35,18 +35,22 @@ function submit_form($user) { if (!is_valid_filename($variant)) die('bad arg'); $desc = "
- A zipped directory with one subdirectory per job, - containing the input file(s) for that job + A zip file with one directory per job. + Each directory contains the input file(s) for that job and an optional file cmdline containing command-line arguments. Details. "; + $desc2 = "
+ Write Docker commands and output to stderr (for debugging). + "; page_head("Submit jobs to $app ($variant)"); form_start('buda_submit.php'); form_input_hidden('action', 'submit'); form_input_hidden('app', $app); form_input_hidden('variant', $variant); form_select("Batch zip file $desc", 'batch_file', $sbitems_zip); + form_checkbox("Verbose Docker output? $desc2", 'wrapper_verbose'); form_submit('OK'); form_end(); page_tail(); @@ -179,7 +183,7 @@ function stage_input_files($batch_dir, $batch_desc, $batch_id) { } function create_jobs( - $variant_desc, $batch_desc, $batch_id, $batch_dir_name + $variant_desc, $batch_desc, $batch_id, $batch_dir_name, $wrapper_verbose ) { global $buda_app; @@ -205,8 +209,10 @@ function create_jobs( $job_cmds .= "$job_cmd\n"; } $cmd = sprintf( - 'cd ../..; bin/create_work --appname %s --batch %d --stdin --command_line "--dockerfile %s --verbose" --wu_template %s --result_template %s', - $buda_app->name, $batch_id, $variant_desc->dockerfile, + 'cd ../..; bin/create_work --appname %s --batch %d --stdin --command_line "--dockerfile %s %s" --wu_template %s --result_template %s', + $buda_app->name, $batch_id, + $variant_desc->dockerfile, + $wrapper_verbose?'--verbose':'', "buda_batches/$batch_dir_name/template_in", "buda_batches/$batch_dir_name/template_out" ); @@ -307,6 +313,7 @@ function handle_submit($user) { if (!is_valid_filename($variant)) die('bad arg'); $batch_file = get_str('batch_file'); if (!is_valid_filename($batch_file)) die('bad arg'); + $wrapper_verbose = get_str('wrapper_verbose', true); $variant_dir = "../../buda_apps/$app/$variant"; $variant_desc = json_decode( @@ -331,7 +338,8 @@ function handle_submit($user) { stage_input_files($batch_dir, $batch_desc, $batch->id); create_jobs( - $variant_desc, $batch_desc, $batch->id, $batch_dir_name + $variant_desc, $batch_desc, $batch->id, $batch_dir_name, + $wrapper_verbose ); // mark batch as in progress diff --git a/html/user/get_output.php b/html/user/get_output.php index dcaf548f254..1c5e69420f4 100644 --- a/html/user/get_output.php +++ b/html/user/get_output.php @@ -52,7 +52,7 @@ function get_output_file($instance_name, $file_num, $auth_str) { return_error("bad authenticator"); } - $names = get_outfile_names($result); + $names = get_outfile_phys_names($result); if ($file_num >= count($names)) { return_error("bad file num: $file_num > ".count($names)); } @@ -105,7 +105,7 @@ function get_batch_output_files($auth_str) { foreach ($wus as $wu) { if (!$wu->canonical_resultid) continue; $result = BoincResult::lookup_id($wu->canonical_resultid); - $names = get_outfile_names($result); + $names = get_outfile_phys_names($result); foreach ($names as $name) { $path = dir_hier_path($name, $upload_dir, $fanout); if (is_file($path)) { @@ -143,7 +143,7 @@ function get_wu_output_file($wu_name, $file_num, $auth_str) { return_error("no canonical result for wu $wu->name"); } $result = BoincResult::lookup_id($wu->canonical_resultid); - $names = get_outfile_names($result); + $names = get_outfile_phys_names($result); $path = dir_hier_path($names[$file_num], $upload_dir, $fanout); if (file_exists($path)) { do_download($path); @@ -181,7 +181,7 @@ function get_wu_output_files($wu_id, $auth_str) { return_error("no canonical result for wu $wu->name"); } $result = BoincResult::lookup_id($wu->canonical_resultid); - $names = get_outfile_names($result); + $names = get_outfile_phys_names($result); foreach ($names as $name) { $path = dir_hier_path($name, $upload_dir, $fanout); if (is_file($path)) { diff --git a/html/user/get_output2.php b/html/user/get_output2.php index a7f5a1e885e..3d584c391c3 100644 --- a/html/user/get_output2.php +++ b/html/user/get_output2.php @@ -52,7 +52,7 @@ function check_auth($auth, $batch) { } function do_result_aux($result, $batch, $file_num=null) { - $phys_names = get_outfile_names($result); + $phys_names = get_outfile_phys_names($result); $log_names = get_outfile_log_names($result); if ($file_num !== null) { $path = upload_path($phys_names[$file_num]); @@ -123,7 +123,7 @@ function do_batch($batch_id, $auth) { $wus = BoincWorkunit::enum("batch=$batch_id and canonical_resultid<>0"); foreach ($wus as $wu) { $result = BoincResult::lookup_id($wu->canonical_resultid); - $phys_names = get_outfile_names($result); + $phys_names = get_outfile_phys_names($result); $log_names = get_outfile_log_names($result); if (count($phys_names) == 1) { $cmd = sprintf('ln -s %s %s/%s__%s', diff --git a/html/user/job_file.php b/html/user/job_file.php index 6409be42cf6..01501381e00 100644 --- a/html/user/job_file.php +++ b/html/user/job_file.php @@ -99,7 +99,7 @@ function upload_error_description($errno) { function query_files($r) { xml_start_tag("query_files"); - list($user, $user_submit) = check_remote_submit_permissions($r, null); + $user = check_remote_submit_permissions($r, null); $absent_files = array(); $now = time(); $delete_time = (int)$r->delete_time; @@ -177,7 +177,7 @@ function delete_uploaded_files() { function upload_files($r) { xml_start_tag("upload_files"); - list($user, $user_submit) = check_remote_submit_permissions($r, null); + $user = check_remote_submit_permissions($r, null); $fanout = parse_config(get_config(), ""); $delete_time = (int)$r->delete_time; $batch_id = (int)$r->batch_id; diff --git a/html/user/sandbox.php b/html/user/sandbox.php index 07fa4d71904..ad493866850 100644 --- a/html/user/sandbox.php +++ b/html/user/sandbox.php @@ -44,9 +44,9 @@ function list_files($user, $notice) {

Upload files

NOTE: if you upload text files from Windows, - they will have CRLF line endings. - If they are shell scripts, they won't work on Linux. - Add shell scripts using Add File. + they will be given CRLF line endings. + Then, if they are shell scripts, they won't work on Linux. + Add shell scripts using 'Add text file' below.

@@ -55,7 +55,7 @@ function list_files($user, $notice) {


-

Add file

+

Add text file

"; form_start('sandbox.php', 'post'); form_input_hidden('action', 'add_file'); @@ -180,7 +180,7 @@ function view_file($user) { } $user = get_logged_in_user(); -if (!submit_permissions($user)) error_page("no job submission access"); +if (!has_file_access($user)) error_page("no job submission access"); $action = get_str('action', true); if (!$action) $action = post_str('action', true); diff --git a/html/user/submit.php b/html/user/submit.php index 7371423361b..8c12b4af318 100644 --- a/html/user/submit.php +++ b/html/user/submit.php @@ -34,6 +34,10 @@ define("PAGE_SIZE", 20); +function return_link() { + echo "

Return to job submission page\n"; +} + function state_count($batches, $state) { $n = 0; foreach ($batches as $batch) { @@ -378,7 +382,7 @@ function handle_batch_stats($user) { page_tail(); return; } - text_start(); + text_start(800); start_table('table-striped'); row2("qualifying results", $n); row2("mean WSS", size_string($wss_sum/$n)); @@ -399,13 +403,13 @@ function handle_batch_stats($user) { // light gray: unsent // function progress_bar($batch, $wus, $width) { - $w_success = $width*$batch->fraction_done; - $w_fail = $width*$batch->nerror_jobs/$batch->njobs; - $nsuccess = $batch->njobs * $batch->fraction_done; - $nsent = wus_nsent($wus); - $nprog = $nsent - $nsuccess - $batch->nerror_jobs; - $w_prog = $width*$nprog/$batch->njobs; - $nunsent = $batch->njobs-$nsent; + $nsuccess = $batch->njobs_success; + $nerror = $batch->nerror_jobs; + $nin_prog = $batch->njobs_in_prog; + $nunsent = $batch->njobs - $nsuccess - $nerror - $nin_prog; + $w_success = $width*$nsuccess/$batch->njobs; + $w_fail = $width*$nerror/$batch->njobs; + $w_prog = $width*$nin_prog/$batch->njobs; $w_unsent = $width*$nunsent/$batch->njobs; $x = ''; if ($w_fail) { @@ -422,9 +426,9 @@ function progress_bar($batch, $wus, $width) { } $x .= "
- $batch->nerror_jobs fail · + $nerror fail · $nsuccess success · - $nprog in progress · + $nin_prog in progress · $nunsent unsent "; @@ -434,8 +438,6 @@ function progress_bar($batch, $wus, $width) { // show the details of an existing batch // function handle_query_batch($user) { - global $web_apps; - $batch_id = get_int('batch_id'); $batch = BoincBatch::lookup_id($batch_id); $app = BoincApp::lookup_id($batch->app_id); @@ -447,10 +449,10 @@ function handle_query_batch($user) { $owner = BoincUser::lookup_id($batch->user_id); } - $web_app = $web_apps[$app->name]; + $web_app = get_web_app($app); page_head("Batch $batch_id"); - text_start(); + text_start(800); start_table(); row2("name", $batch->name); if ($batch->description) { @@ -467,14 +469,21 @@ function handle_query_batch($user) { if ($batch->expire_time) { row2("expiration time", time_str($batch->expire_time)); } - row2("progress", progress_bar($batch, $wus, 600)); + if ($batch->njobs) { + row2("progress", progress_bar($batch, $wus, 600)); + } if ($batch->completion_time) { row2("completed", local_time_str($batch->completion_time)); } row2("GFLOP/hours, estimated", number_format(credit_to_gflop_hours($batch->credit_estimate), 2)); row2("GFLOP/hours, actual", number_format(credit_to_gflop_hours($batch->credit_canonical), 2)); - row2("Output File Size", size_string(batch_output_file_size($batch->id))); + if (!$web_app->assim_move) { + row2("Total size of output files", + size_string(batch_output_file_size($batch->id)) + ); + } end_table(); + echo "

"; if ($web_app->assim_move) { $url = "get_output3.php?action=get_batch&batch_id=$batch->id"; @@ -537,34 +546,47 @@ function handle_query_batch($user) { row_array($x); } end_table(); - echo "

Return to job control page\n"; + return_link(); text_end(); page_tail(); } -// show the details of a job, including links to see the output files +// get the 'web app' structure (from project.inc) for the given app. +// This says what output file scheme it uses and what the submit page URL is. +// If not listed, return a default structure // -function handle_query_job($user) { +function get_web_app($app) { global $web_apps; + if (isset($web_apps) && array_key_exists($app->name, $web_apps)) { + return $web_apps[$app->name]; + } + $x = new StdClass; + $x->submit_url = null; + $x->assim_move = false; + return $x; +} +// show the details of a job, including links to see the output files +// +function handle_query_job($user) { $wuid = get_int('wuid'); $wu = BoincWorkunit::lookup_id($wuid); if (!$wu) error_page("no such job"); $app = BoincApp::lookup_id($wu->appid); - $web_app = $web_apps[$app->name]; + $web_app = get_web_app($app); - page_head("Job $wu->name"); - text_start(); + page_head("Job '$wu->name'"); + text_start(800); echo " - Workunit details +

  • Workunit details

    - batch>Batch $wu->batch +

  • batch>Batch details "; echo "

    Instances

    \n"; - start_table(); + start_table('table-striped'); table_header( "ID
    click for result page", "State", @@ -580,7 +602,7 @@ function handle_query_job($user) { ]; $i = 0; if ($result->server_state == RESULT_SERVER_STATE_OVER) { - $phys_names = get_outfile_names($result); + $phys_names = get_outfile_phys_names($result); $log_names = get_outfile_log_names($result); for ($i=0; $iassim_move) { @@ -591,19 +613,24 @@ function handle_query_job($user) { ); $x[] = "view · download"; } else { - // file is in upload hier - $url = sprintf( - 'get_output2.php?cmd=result&result_id=%d&file_num=%d', - $result->id, $i - ); - $path = dir_hier_path($phys_names[$i], $upload_dir, $fanout); - $s = stat($path); - $size = $s['size']; - $x[] = sprintf('%s (%s bytes)
    ', - $url, - $log_names[$i], - number_format($size) + $path = dir_hier_path( + $phys_names[$i], $upload_dir, $fanout ); + if (file_exists($path)) { + $url = sprintf( + 'get_output2.php?cmd=result&result_id=%d&file_num=%d', + $result->id, $i + ); + $s = stat($path); + $size = $s['size']; + $x[] = sprintf('%s (%s bytes)
    ', + $url, + $log_names[$i], + number_format($size) + ); + } else { + $x[] = sprintf("file '%s' is missing", $log_names[$i]); + } } } } else { @@ -618,7 +645,7 @@ function handle_query_job($user) { echo "

    Input files

    \n"; $x = "".$wu->xml_doc.""; $x = simplexml_load_string($x); - start_table(); + start_table('table-striped'); table_header("Name
    (click to view)", "Size (bytes)", "MD5" ); @@ -639,7 +666,7 @@ function handle_query_job($user) { end_table(); text_end(); - echo "

    Return to job control page\n"; + return_link(); page_tail(); } @@ -655,7 +682,7 @@ function handle_abort_batch_confirm() { "submit.php?action=abort_batch&batch_id=$batch_id", "Yes - abort batch" ); - echo "

    Return to job control page\n"; + return_link(); page_tail(); } @@ -675,7 +702,7 @@ function handle_abort_batch($user) { check_access($user, $batch); abort_batch($batch); page_head("Batch aborted"); - echo "

    Return to job control page\n"; + return_link(); page_tail(); } @@ -691,7 +718,7 @@ function handle_retire_batch_confirm() { "submit.php?action=retire_batch&batch_id=$batch_id", "Yes - retire batch" ); - echo "

    Return to job control page\n"; + return_link(); page_tail(); } @@ -702,7 +729,7 @@ function handle_retire_batch($user) { check_access($user, $batch); retire_batch($batch); page_head("Batch retired"); - echo "

    Return to job control page\n"; + return_link(); page_tail(); } diff --git a/html/user/submit_example.php b/html/user/submit_example.php index de4558b5b4e..4cb97c46606 100644 --- a/html/user/submit_example.php +++ b/html/user/submit_example.php @@ -314,11 +314,12 @@ function handle_query_batch() { row2("GFLOP/hours, estimated", number_format(credit_to_gflop_hours($batch->credit_estimate), 2)); row2("GFLOP/hours, actual", number_format(credit_to_gflop_hours($batch->credit_canonical), 2)); end_table(); + echo "

    "; $url = boinc_get_output_files($req); show_button($url, "Get zipped output files"); + echo "

    "; switch ($batch->state) { case BATCH_STATE_IN_PROGRESS: - echo "
    "; show_button( "submit_example.php?action=abort_batch_confirm&batch_id=$req->batch_id", "Abort batch" @@ -326,7 +327,6 @@ function handle_query_batch() { break; case BATCH_STATE_COMPLETE: case BATCH_STATE_ABORTED: - echo "
    "; show_button( "submit_example.php?action=retire_batch_confirm&batch_id=$req->batch_id", "Retire batch"