From fb67d1e86eeced0e347ab753573dd774420e9a76 Mon Sep 17 00:00:00 2001 From: Albert Louis Rossi Date: Tue, 15 Feb 2022 12:33:59 -0600 Subject: [PATCH] dcache-bulk: aborted request gets stuck in the STARTED state Motivation: On FNAL production we have found that file permission or existence errors on paths/targets at the root of the request leave the request stuck in the STARTED state. Modification: The initial bulk request job needs to check for request completion after unregistering itself from the completion handler. Result: Jobs which originally get stuck now complete; their failure information contains the reason for premature completion. (NOTE: the 'null' DEPTH seems to appear on FNAL production [7.2], but but I have not yet been able to reproduce it using 7.2 on the testbed.) Target: master Request: 8.0 Request: 7.2 Requires-notes: yes Requires-book: no Patch: https://rb.dcache.org/r/13442/ Acked-by: Tigran --- .../services/bulk/handlers/BulkRequestHandler.java | 7 ++++++- .../services/bulk/handlers/BulkSubmissionHandler.java | 8 ++++++++ .../org/dcache/services/bulk/job/BulkRequestJob.java | 9 +++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/handlers/BulkRequestHandler.java b/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/handlers/BulkRequestHandler.java index e09b043bdf3..b02638f80d3 100644 --- a/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/handlers/BulkRequestHandler.java +++ b/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/handlers/BulkRequestHandler.java @@ -73,6 +73,7 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING import org.dcache.services.bulk.BulkRequestNotFoundException; import org.dcache.services.bulk.BulkRequestStatus; import org.dcache.services.bulk.BulkRequestStatus.Status; +import org.dcache.services.bulk.BulkRequestStorageException; import org.dcache.services.bulk.BulkServiceException; import org.dcache.services.bulk.BulkStorageException; import org.dcache.services.bulk.job.BulkJob; @@ -136,6 +137,11 @@ public synchronized void abortRequestTarget(String requestId, String target, statistics.incrementJobsAborted(); } + public synchronized void abortRequest(String requestId) throws BulkRequestStorageException { + requestStore.update(requestId, COMPLETED); + statistics.incrementRequestsCompleted(); + } + @Override public synchronized void cancelRequest(Subject subject, String requestId) throws BulkServiceException { @@ -229,7 +235,6 @@ public synchronized void requestTargetCompleted(BulkJob job) throws BulkServiceE } } - @Required public void setCallbackExecutorService(ExecutorService service) { callbackExecutorService = service; diff --git a/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/handlers/BulkSubmissionHandler.java b/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/handlers/BulkSubmissionHandler.java index 85dc195062d..bf4e522ec14 100644 --- a/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/handlers/BulkSubmissionHandler.java +++ b/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/handlers/BulkSubmissionHandler.java @@ -83,6 +83,14 @@ void abortRequestTarget(String requestId, String target, Throwable exception) throws BulkServiceException; + /** + * Unrecoverable internal failure. Mark the request as terminated. + * + * @param requestId unique identifier + */ + void abortRequest(String requestId) + throws BulkServiceException; + /** * Services request (from user) to (cancel) the request. * diff --git a/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/job/BulkRequestJob.java b/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/job/BulkRequestJob.java index 0b3096f6f57..824fa0da6b4 100644 --- a/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/job/BulkRequestJob.java +++ b/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/job/BulkRequestJob.java @@ -170,6 +170,15 @@ protected void doRun() { protected void postCompletion() { completionHandler.requestProcessingFinished(key.getJobId()); + + if (completionHandler.isRequestCompleted()) { + try { + submissionHandler.abortRequest(key.getRequestId()); + } catch (BulkServiceException e) { + LOGGER.error("RequestJob, postCompletion() for {}: {}.", key.getRequestId(), + e.getMessage()); + } + } } private void handleDirectory(String target, FileAttributes attributes)