Skip to content

Commit

Permalink
bugfix
Browse files Browse the repository at this point in the history
  * inherited source and target node heads in chain behave properly
  • Loading branch information
amir-zeldes committed May 19, 2024
1 parent 06abf52 commit 12094c0
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 6 deletions.
22 changes: 18 additions & 4 deletions rst2dep/rst2dep.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,23 +298,34 @@ def make_rsd(rstfile, xml_dep_root="", as_text=False, docname=None, out_mode="co

# Get head EDU and height per node
node2head_edu = {}
target_node2head_edu = {} # Only distinct for chain algorithm
for edu in edus:
edu.height = 0
node = edu
edu_id = edu.id
node2head_edu[node.id] = edu_id
target_node2head_edu[node.id] = node2head_edu[node.id] = edu_id
while node.parent != "0":
this_height = node.height + 1
span_parent = node.relname == "span"
multinuc_parent = nodes[node.parent].kind == "multinuc" and (nodes[node.parent].leftmost_child == node.id or (node.relname.endswith("_m") and not node.relname.startswith("same")))
node = nodes[node.parent]
if node.kind == "edu":
edu_id = node.id
if node.id not in node2head_edu:
node2head_edu[node.id] = edu_id
if span_parent or multinuc_parent:
target_node2head_edu[node.id] = node2head_edu[node.id] = edu_id
node.height = this_height
else:
if int(edu_id) < int(node2head_edu[node.id]): # Prefer left most child as head
node2head_edu[node.id] = edu_id
if span_parent or multinuc_parent:
node2head_edu[node.id] = edu_id
node.height = this_height
if int(edu_id) > int(target_node2head_edu[node.id]): # Prefer right most child as head for chain target
if span_parent or multinuc_parent:
target_node2head_edu[node.id] = edu_id

if not span_parent and not multinuc_parent:
break # A satellite relation has been traversed, stop looking for nodes headed by this

# Get height distance from dependency parent to child's attachment point in the phrase structure (number of spans)
for nid in nodes:
Expand All @@ -341,7 +352,10 @@ def make_rsd(rstfile, xml_dep_root="", as_text=False, docname=None, out_mode="co
secedge_mapping = {}
for secedge in secedges:
dep_src = node2head_edu[nodes[secedge.source].id]
dep_trg = node2head_edu[nodes[secedge.target].id]
if algorithm == "chain":
dep_trg = target_node2head_edu[nodes[secedge.target].id]
else:
dep_trg = node2head_edu[nodes[secedge.target].id]
src_dist = str(nodes[secedge.source].height)
trg_dist = str(nodes[secedge.target].height)
signals = []
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
setup(
name = 'rst2dep',
packages = find_packages(),
version = '1.3.0.0',
version = '1.3.0.1',
description = 'RST (Rhetorical Structure Theory) constituent and dependency converter for discourse parses',
author = 'Amir Zeldes',
author_email = 'amir.zeldes@georgetown.edu',
package_data = {'':['README.md','LICENSE','requirements.txt'],'rst2dep':['*']},
install_requires=[],
url = 'https://github.com/amir-zeldes/rst2dep',
license='Apache License, Version 2.0',
download_url = 'https://github.com/amir-zeldes/rst2dep/releases/tag/v1.3.0.0',
download_url = 'https://github.com/amir-zeldes/rst2dep/releases/tag/v1.3.0.1',
keywords = ['NLP', 'RST', 'discourse', 'dependencies', 'converter', 'conversion','Rhetorical Structure Theory','parsing'],
classifiers = ['Programming Language :: Python',
'Programming Language :: Python :: 2',
Expand Down

0 comments on commit 12094c0

Please sign in to comment.