Skip to content

Commit

Permalink
Merge pull request #96 from fgnt/viz_improvements
Browse files Browse the repository at this point in the history
Improve Visualization
  • Loading branch information
thequilo authored Oct 18, 2024
2 parents f2d133a + 210cba3 commit ce8b0f6
Show file tree
Hide file tree
Showing 11 changed files with 683 additions and 160 deletions.
3 changes: 3 additions & 0 deletions meeteval/der/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ def md_eval_22(
regex=None,
uem=None,
):
"""
Computes the Diarization Error Rate (DER) using md-eval-22.pl.
"""
from meeteval.der.api import md_eval_22
results = md_eval_22(
reference,
Expand Down
3 changes: 3 additions & 0 deletions meeteval/der/md_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,9 @@ def convert(string):


def md_eval_22(reference, hypothesis, collar=0, regions='all', uem=None):
"""
Computes the Diarization Error Rate (DER) using md-eval-22.pl.
"""
from meeteval.io.rttm import RTTM
reference = RTTM.new(reference, filename='dummy')
hypothesis = RTTM.new(hypothesis, filename='dummy')
Expand Down
116 changes: 80 additions & 36 deletions meeteval/viz/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,48 @@ def create_viz_folder(
regex=None,
normalizer=None,
js_debug=False,
per_reco_file=None,
):
out = Path(out)
out.mkdir(parents=True, exist_ok=True)

if isinstance(alignments, str):
alignments = alignments.split(',')

if per_reco_file is not None:
assert len(alignments) == len(per_reco_file), alignments

error_rate_classes = {
'tcp': meeteval.wer.CPErrorRate,
'cp': meeteval.wer.CPErrorRate,
'tcorc': meeteval.wer.OrcErrorRate,
'orc': meeteval.wer.OrcErrorRate,
'greedy_orc': meeteval.wer.OrcErrorRate,
'greedy_tcorc': meeteval.wer.OrcErrorRate,
'greedy_dicp': meeteval.wer.DICPErrorRate,
'greedy_ditcp': meeteval.wer.DICPErrorRate,
}

def load_per_reco_file(alignment, f):
from meeteval.wer.__main__ import _load

error_rate_cls = error_rate_classes[alignment]

return {
session_id: error_rate_cls.from_dict(pr)
for session_id, pr in _load(Path(f)).items()
}
per_reco = {
alignment: load_per_reco_file(alignment, f)
for alignment, f in zip(alignments, per_reco_file)
}
else:
per_reco = collections.defaultdict(lambda: collections.defaultdict(lambda: None))

avs = {}
for (i, hypothesis), alignment in tqdm.tqdm(list(itertools.product(
hypothesiss.items(),
alignments.split(','),
alignments,
))):

r, h = _load_texts(
Expand All @@ -43,11 +77,14 @@ def create_viz_folder(
print(f'Ignore {xor}, because they are not available in reference and hypothesis.')

for session_id in tqdm.tqdm(session_ids):
av = AlignmentVisualization(r[session_id],
h[session_id],
alignment=alignment,
js_debug=js_debug,
sync_id=1)
av = AlignmentVisualization(
r[session_id],
h[session_id],
alignment=alignment,
js_debug=js_debug,
sync_id=1,
precomputed_error_rate=per_reco[alignment][session_id],
)
av.dump(out / f'{session_id}_{i}_{alignment}.html')
avs.setdefault((i, alignment), {})[session_id] = av

Expand All @@ -62,24 +99,6 @@ def create_viz_folder(
avs_T[session_id][i] = av
avs_T = dict(avs_T)

for session_id, v in avs_T.items():
doc, tag, text = Doc().tagtext()
doc.asis('<!DOCTYPE html>')

# With 100 % there is a scroll bar -> use 99 %
with tag('html', style="height: 99%; margin: 0;"):
with tag('body', style="width: 100%; height: 100%; margin: 0; display: flex;"):
for (i, alignment), av in v.items():
with tag('div', style='flex-grow: 1'):
with tag('iframe', src=f'{session_id}_{i}_{alignment}.html',
title="right", width="100%",
height="100%", style="border-width: 0"):
pass

file = out / f"{session_id}.html"
file.write_text(indent(doc.getvalue()))
print(f'Wrote file://{file.absolute()}')

###########################################################################

from yattag import Doc
Expand All @@ -105,7 +124,7 @@ def get_wer(v):
pass
doc.asis('<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/jquery.tablesorter/2.31.2/css/theme.default.min.css">')
with tag('style'):
n = len(alignments.split(','))
n = len(alignments)
doc.asis(f'''
/* Center table */
body {{
Expand Down Expand Up @@ -143,7 +162,7 @@ def get_wer(v):
with tag('th', ('data-sorter', "false"), colspan=len(list(item))):
doc.text(system)

if ',' in alignments or len(hypothesiss) > 1:
if len(alignments) > 1 or len(hypothesiss) > 1:
with tag('th', ('data-sorter', "false"), colspan=2):
with tag('span', klass='synced-view'):
pass
Expand All @@ -159,7 +178,7 @@ def get_wer(v):
with tag('span', klass='number'):
doc.text(get_wer(v))

if ',' in alignments or len(hypothesiss) > 1:
if len(alignments) > 1 or len(hypothesiss) > 1:
with tag('th', ('data-sorter', "false"), colspan=2):
doc.text("Side-by-side views")

Expand All @@ -180,12 +199,9 @@ def get_wer(v):

if len(v) > 1:
with tag('td'):
with tag('a', href=f'{session_id}.html'):
doc.text('SideBySide')
with tag('td'):
tags = '&'.join(f'{session_id}_{i}_{a}' for i, a in v.keys())
tags = '&'.join(f'{session_id}_{i}_{a}.html' for i, a in v.keys())
with tag('a', href=f'side_by_side_sync.html?{tags}'):
doc.text('SydeBySide Synced')
doc.text('SydeBySide')
doc.asis('''
<script>
$(document).ready(function() {
Expand Down Expand Up @@ -232,7 +248,23 @@ def html(
normalizer=None,
out='viz',
js_debug=False,
per_reco_file=None,
):
"""
Creates a visualization of the alignment between reference and hypothesis for the specified WER algorithm.
The visualization is created in two steps.
First, compute the WER and assignment, i.e. the mapping of utterances/segments to streams. Any WER algorithm
from meeteval can be used for this. Depending on the algorithm, the labels of the reference or
hypothesis utterances or streams are modified.
Second, compute the alignment, i.e. the matching of words between reference and hypothesis (insertion,
deletion, substitution). This is done with a time-constrained algorithm if the assignment was
time-constrained, otherwise with a "classical" unconstrained algorithm.
"""
def prepare(i: int, h: str):
if ':' in h and not Path(h).exists():
# inspired by tensorboard from the --logdir_spec argument.
Expand All @@ -259,6 +291,7 @@ def prepare(i: int, h: str):
regex=regex,
normalizer=normalizer,
js_debug=js_debug,
per_reco_file=per_reco_file,
)


Expand All @@ -271,10 +304,11 @@ def add_argument(self, command_parser, name, p):
if name == 'alignment':
command_parser.add_argument(
'--alignment',
choices=['tcp', 'cp', 'tcp,cp', 'cp,tcp', 'tcorc', 'orc'],
help='Specifies which alignment is used.\n'
'- cp: Find the permutation that minimizes the cpWER and use the "classical" alignment.\n'
'- tcp: Find the permutation that minimizes the tcpWER and use a time constraint alignment.'
choices=['tcp', 'cp', 'orc', 'greedy_orc', 'tcorc', 'greedy_tcorc', 'greedy_dicp', 'greedy_ditcp'],
nargs='+',
help='Specifies the algorithm used to obtain the alignment. \n'
'Multiple alignments can be specified to generate multiple visualizations with a single '
'merged overview table and side-by-side views.'
)
elif name == 'hypothesis':
command_parser.add_argument(
Expand All @@ -291,6 +325,16 @@ def add_argument(self, command_parser, name, p):
action='store_true',
help='Add a debug flag to the HTML output to enable debugging in the browser.'
)
elif name == 'per_reco_file':
command_parser.add_argument(
'--per-reco-file',
help='A precomputed per-reco file. Loads the WER and (stream) '
'assignment information from this file instead of computing it. '
'If supplied, the number of files must match the number of alignments specified '
'with --alignment.',
default=None,
nargs='+',
)
else:
return super().add_argument(command_parser, name, p)

Expand Down
143 changes: 124 additions & 19 deletions meeteval/viz/side_by_side_sync.html
Original file line number Diff line number Diff line change
@@ -1,46 +1,151 @@

<!DOCTYPE html>
<html style="height:99%; margin: 0;">
<html style="height: 100%; margin: 0;">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>MeetEval: Side by side view</title>
<style>
.container {
display: flex;
max-width: 100%;
white-space: nowrap; /* Prevent wrapping and hide overflowing content. */
overflow-x: auto; /* Enable horizontal scrolling */
white-space: nowrap; /* Prevent line breaks within the container */
height: 100%;
width:100%;
flex-direction: column;

}
.breadcrumb-container {
display: inline-flex;
justify-content: flex-end;
overflow: hidden;
max-width: min-content;
}
.breadcrumb {
font-family: Arial, sans-serif;
background-color: #f9f9f9;
align-items: center;
padding: 2px 10px;
background-color: #fff;
box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
border-radius: 6px;
font-size: 14px; /* Smaller font size */
}
.breadcrumb a {
color: #3498db;
text-decoration: none;
padding: 4px 8px; /* Reduced padding for tighter layout */
border-radius: 4px;
transition: background-color 0.3s;
}
.breadcrumb a:hover {
background-color: #3498db;
color: #fff;
}
.breadcrumb span {
margin: 0 3px; /* Smaller spacing between arrows and links */
color: #555;
}

iframe {
width: 100%;
flex-grow: 1;
border: none;
}
</style>
</head>
<body style="width: 100%; height: 100%; margin: 0; display: flex;">
</body>

<script>
function createBreadcrumb(url, container, iframe) {
container.className = 'breadcrumb-container';
const breadcrumbContainer = document.createElement('div');
breadcrumbContainer.className = 'breadcrumb';
container.append(breadcrumbContainer);
const urlObj = new URL(url); // Create a URL object
const parts = urlObj.pathname.split('/').filter(Boolean); // Get path parts and filter out empty strings
let accumulatedPath = urlObj.origin; // Start with the base URL (origin)

// Add 'Home' link
breadcrumbContainer.innerHTML += `<a href="${accumulatedPath}">/</a>`;

// Add a click event listener to the breadcrumb container
breadcrumbContainer.addEventListener('click', function(event) {
// Check if the clicked element is a link
if (event.target.tagName === 'A') {
event.preventDefault(); // Prevent default link navigation

const newSrc = event.target.getAttribute('href'); // Get the href of the clicked link

iframe.src = newSrc; // Change the iframe's source
}
});

// Loop through each folder and create a link
parts.forEach((part, index) => {
accumulatedPath += `/${part}`;
breadcrumbContainer.innerHTML += `<span>&#187;</span><a href="${accumulatedPath}">${part}</a>`;
});
}

var urlParams = new URLSearchParams(window.location.search);
const iframes = []
const breadcrumbs = []
const body = document.getElementsByTagName('body')[0];
var sync = true;
const new_url_params = [];
const other_new_url_params = {};
urlParams.forEach((value, key) => {
if (!value) {
// Value is none when no key is given (e.g., ?systemA&systemB vs ?sync=True)
value = key;

// Preprocess file path
if (!value.endsWith('.html') && !value.endsWith('.htm')) {
value += '.html';
}
const iframe = document.createElement('iframe');
iframe.src = value;
iframe.width = "100%";
iframe.height = "100%";
iframe.style.border = "none";
iframes.push(iframe);
const div = document.createElement('div')
div.style = "flex-grow: 1;";
div.append(iframe)
div.className = 'container';
const breadcrumbContainer = document.createElement('div');
div.append(breadcrumbContainer);
breadcrumbs.push(breadcrumbContainer);
console.log(value);
createBreadcrumb('file://' + value, breadcrumbContainer, iframe);
div.append(iframe);
body.append(div);
new_url_params.push(value);
} else if (key === "sync") {
sync = value === "true";
other_new_url_params[key] = value;
}
})
var url = new URL(window.location.href);
url.search = new_url_params.join('&') + '&' + new URLSearchParams(other_new_url_params).toString();
history.replaceState(null, null, url);
console.log(url.searchParams)

if (sync) {
window.addEventListener("message", event => {
iframes.forEach(iframe => {
if (iframe.contentWindow !== event.source) {
iframe.contentWindow.postMessage(event.data, '*');
}
})
})
}
window.addEventListener("message", event => {
if (event.data.type === 'url') {
// This is the only way to get the location of the page in the iframe
// We can't access iframe.contentWindow.location.href directly due to CORS
// This is also the reason why the breadcrumbs and URL are not updated
// when navigating the directory structure of the iframe
let index = iframes.findIndex(iframe => iframe.contentWindow === event.source);
new_url_params[index] = event.data.url.replace('file://', '');
breadcrumbs[index].innerHTML = '';
createBreadcrumb(event.data.url, breadcrumbs[index], iframes[index]);
var url = new URL(window.location.href);
url.search = new_url_params.join('&') + '&' + new URLSearchParams(other_new_url_params).toString();
history.replaceState(null, null, url);
} else if (sync) {
iframes.forEach(iframe => {
if (iframe.contentWindow !== event.source) {
iframe.contentWindow.postMessage(event.data, '*');
}
})
}
})
</script>
</html>
Loading

0 comments on commit ce8b0f6

Please sign in to comment.