From b5b996be980cce55bf7645d2a78e9d132479dd6a Mon Sep 17 00:00:00 2001 From: Peter Donovan Date: Mon, 28 Aug 2023 21:34:47 -0700 Subject: [PATCH 1/2] Remove the "waiting for tag" field. When a federate sends a NET for a certain tag, and it has upstream federates, we incorrectly assumed that the federate needs to receive a TAG or PTAG in order to allow it to proceed to that NET. This assumption is incorrect because after sending the NET, the federate might receive a message from an upstream federate; that message might lead to a newly lowered NET. The bug that resulted from the incorrect assumption is that the federate would not proceed to execute the event enabled by the message it received after sending the NET. This is because the federate does not realize that it has anything to do; it still thinks that it cannot do anything until it receives a TAG or PTAG that enables it to process the NET. Therefore, the federation deadlocks. The fix is to compute a new next event tag whenever the event queue changes. --- core/federated/federate.c | 19 +++++-------------- include/core/federated/federate.h | 7 ------- 2 files changed, 5 insertions(+), 21 deletions(-) diff --git a/core/federated/federate.c b/core/federated/federate.c index f4e8a85f4..8065e7891 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -98,7 +98,6 @@ federate_instance_t _fed = { .server_port = -1, .last_TAG = {.time = NEVER, .microstep = 0u}, .is_last_TAG_provisional = false, - .waiting_for_TAG = false, .has_upstream = false, .has_downstream = false, .received_stop_request_from_rti = false, @@ -1897,8 +1896,6 @@ void handle_tag_advance_grant(void) { TAG.time - start_time, TAG.microstep, _fed.last_TAG.time - start_time, _fed.last_TAG.microstep); } - - _fed.waiting_for_TAG = false; // Notify everything that is blocked. lf_cond_broadcast(&env->event_q_changed); @@ -2077,7 +2074,6 @@ void handle_provisional_tag_advance_grant() { } _fed.last_TAG = PTAG; - _fed.waiting_for_TAG = false; _fed.is_last_TAG_provisional = true; LF_PRINT_LOG("At tag " PRINTF_TAG ", received Provisional Tag Advance Grant (PTAG): " PRINTF_TAG ".", env->current_tag.time - start_time, env->current_tag.microstep, @@ -2736,31 +2732,26 @@ tag_t _lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply tag.time - start_time, tag.microstep); return tag; } - // Fed has upstream federates. Have to wait for a TAG or PTAG. - _fed.waiting_for_TAG = true; // Wait until a TAG is received from the RTI. while (true) { // Wait until either something changes on the event queue or // the RTI has responded with a TAG. - LF_PRINT_DEBUG("Waiting for a TAG from the RTI."); + LF_PRINT_DEBUG("Waiting for a TAG from the RTI with _fed.last_TAG.time=%lld, %lld and net=%lld, %lld", (long long) _fed.last_TAG.time - start_time, (long long) _fed.last_TAG.microstep, (long long) tag.time - start_time, (long long) tag.microstep); if (lf_cond_wait(&env->event_q_changed) != 0) { lf_print_error("Wait error."); } - // Either a TAG or PTAG arrived or something appeared on the event queue. - if (!_fed.waiting_for_TAG) { - // _fed.last_TAG will have been set by the thread receiving the TAG message that - // set _fed.waiting_for_TAG to false. - return _fed.last_TAG; - } // Check whether the new event on the event queue requires sending a new NET. tag_t next_tag = get_next_event_tag(env); if (lf_tag_compare(next_tag, tag) != 0) { _lf_send_tag(MSG_TYPE_NEXT_EVENT_TAG, next_tag, wait_for_reply); _fed.last_sent_NET = next_tag; - LF_PRINT_LOG("Sent next event tag (NET) " PRINTF_TAG " to RTI.", + LF_PRINT_LOG("Sent next event tag (NET) " PRINTF_TAG " to RTI from loop.", next_tag.time - lf_time_start(), next_tag.microstep); } + if (lf_tag_compare(_fed.last_TAG, next_tag) >= 0) { + return _fed.last_TAG; + } } } diff --git a/include/core/federated/federate.h b/include/core/federated/federate.h index e00d07218..f8f2e626d 100644 --- a/include/core/federated/federate.h +++ b/include/core/federated/federate.h @@ -160,13 +160,6 @@ typedef struct federate_instance_t { */ bool is_last_TAG_provisional; - /** - * Indicator of whether a NET has been sent to the RTI and no TAG - * yet received in reply. - * This variable should only be accessed while holding the mutex lock. - */ - bool waiting_for_TAG; - /** * Indicator of whether this federate has upstream federates. * The default value of false may be overridden in _lf_initialize_trigger_objects. From 49483cfe43ae576171d4d71dcb1bff13548fc552 Mon Sep 17 00:00:00 2001 From: Peter Donovan Date: Wed, 30 Aug 2023 23:13:18 -0700 Subject: [PATCH 2/2] Fix bug in previous commit. The problem was that the NET obtained by get_next_event_tag might be farther into the future than the NET that was just submitted to the RTI. This is possible when executing the start tag because all federates send a NET for the start tag regardless of their event queues. --- core/federated/federate.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/core/federated/federate.c b/core/federated/federate.c index 8065e7891..91d2e4a04 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -2743,15 +2743,18 @@ tag_t _lf_send_next_event_tag(environment_t* env, tag_t tag, bool wait_for_reply } // Check whether the new event on the event queue requires sending a new NET. tag_t next_tag = get_next_event_tag(env); + if ( + lf_tag_compare(_fed.last_TAG, next_tag) >= 0 + || lf_tag_compare(_fed.last_TAG, tag) >= 0 + ) { + return _fed.last_TAG; + } if (lf_tag_compare(next_tag, tag) != 0) { _lf_send_tag(MSG_TYPE_NEXT_EVENT_TAG, next_tag, wait_for_reply); _fed.last_sent_NET = next_tag; LF_PRINT_LOG("Sent next event tag (NET) " PRINTF_TAG " to RTI from loop.", next_tag.time - lf_time_start(), next_tag.microstep); } - if (lf_tag_compare(_fed.last_TAG, next_tag) >= 0) { - return _fed.last_TAG; - } } }