From 12094c097138f34f804a693ee52cd6d6c99e0097 Mon Sep 17 00:00:00 2001 From: amir-zeldes Date: Sun, 19 May 2024 18:20:55 -0400 Subject: [PATCH] bugfix * inherited source and target node heads in chain behave properly --- rst2dep/rst2dep.py | 22 ++++++++++++++++++---- setup.py | 4 ++-- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/rst2dep/rst2dep.py b/rst2dep/rst2dep.py index 9cc98ee..dc3619e 100644 --- a/rst2dep/rst2dep.py +++ b/rst2dep/rst2dep.py @@ -298,23 +298,34 @@ def make_rsd(rstfile, xml_dep_root="", as_text=False, docname=None, out_mode="co # Get head EDU and height per node node2head_edu = {} + target_node2head_edu = {} # Only distinct for chain algorithm for edu in edus: edu.height = 0 node = edu edu_id = edu.id - node2head_edu[node.id] = edu_id + target_node2head_edu[node.id] = node2head_edu[node.id] = edu_id while node.parent != "0": this_height = node.height + 1 + span_parent = node.relname == "span" + multinuc_parent = nodes[node.parent].kind == "multinuc" and (nodes[node.parent].leftmost_child == node.id or (node.relname.endswith("_m") and not node.relname.startswith("same"))) node = nodes[node.parent] if node.kind == "edu": edu_id = node.id if node.id not in node2head_edu: - node2head_edu[node.id] = edu_id + if span_parent or multinuc_parent: + target_node2head_edu[node.id] = node2head_edu[node.id] = edu_id node.height = this_height else: if int(edu_id) < int(node2head_edu[node.id]): # Prefer left most child as head - node2head_edu[node.id] = edu_id + if span_parent or multinuc_parent: + node2head_edu[node.id] = edu_id node.height = this_height + if int(edu_id) > int(target_node2head_edu[node.id]): # Prefer right most child as head for chain target + if span_parent or multinuc_parent: + target_node2head_edu[node.id] = edu_id + + if not span_parent and not multinuc_parent: + break # A satellite relation has been traversed, stop looking for nodes headed by this # Get height distance from dependency parent to child's attachment point in the phrase structure (number of spans) for nid in nodes: @@ -341,7 +352,10 @@ def make_rsd(rstfile, xml_dep_root="", as_text=False, docname=None, out_mode="co secedge_mapping = {} for secedge in secedges: dep_src = node2head_edu[nodes[secedge.source].id] - dep_trg = node2head_edu[nodes[secedge.target].id] + if algorithm == "chain": + dep_trg = target_node2head_edu[nodes[secedge.target].id] + else: + dep_trg = node2head_edu[nodes[secedge.target].id] src_dist = str(nodes[secedge.source].height) trg_dist = str(nodes[secedge.target].height) signals = [] diff --git a/setup.py b/setup.py index 8d666c9..9d60568 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name = 'rst2dep', packages = find_packages(), - version = '1.3.0.0', + version = '1.3.0.1', description = 'RST (Rhetorical Structure Theory) constituent and dependency converter for discourse parses', author = 'Amir Zeldes', author_email = 'amir.zeldes@georgetown.edu', @@ -11,7 +11,7 @@ install_requires=[], url = 'https://github.com/amir-zeldes/rst2dep', license='Apache License, Version 2.0', - download_url = 'https://github.com/amir-zeldes/rst2dep/releases/tag/v1.3.0.0', + download_url = 'https://github.com/amir-zeldes/rst2dep/releases/tag/v1.3.0.1', keywords = ['NLP', 'RST', 'discourse', 'dependencies', 'converter', 'conversion','Rhetorical Structure Theory','parsing'], classifiers = ['Programming Language :: Python', 'Programming Language :: Python :: 2',