Skip to content

Commit

Permalink
Merge pull request #45 from PankratzLab/develop
Browse files Browse the repository at this point in the history
DonorCheck v0.1.15
  • Loading branch information
rcoleb authored Jun 20, 2024
2 parents 0e208fc + 2cc8e41 commit 41cbec2
Show file tree
Hide file tree
Showing 37 changed files with 223,390 additions and 1,292 deletions.
7 changes: 6 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
<groupId>org.pankratzlab</groupId>
<artifactId>donor-check</artifactId>
<!-- if version number changes, be sure to update in project.properties file also -->
<version>0.0.14</version>
<version>0.1.15</version>
<packaging>jar</packaging>

<description>A stand-alone tool for validating DonorNet typing entries.</description>
Expand Down Expand Up @@ -73,6 +73,11 @@
</properties>

<dependencies>
<dependency>
<groupId>org.pankratzlab</groupId>
<artifactId>BackgroundDataProcessor-java8</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>org.apache.maven</groupId>
<artifactId>maven-model</artifactId>
Expand Down
17 changes: 11 additions & 6 deletions src/main/java/org/pankratzlab/unet/deprecated/hla/Antigen.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* #%L
* DonorCheck
* %%
* Copyright (C) 2018 - 2019 Computational Pathology - University of Minnesota
* Copyright (C) 2018 - 2024 Computational Pathology - University of Minnesota
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
Expand Down Expand Up @@ -39,7 +39,6 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import com.google.common.collect.ImmutableList;
import com.google.common.primitives.Ints;

Expand Down Expand Up @@ -232,16 +231,22 @@ private List<Integer> parse(String[] p) {
int[] iVal = new int[1];
Matcher matcher = SPEC_PATTERN.matcher(spec);
matcher.find();
// Extracts value from parents if they were present
String val = matcher.group(2) != null ? matcher.group(2) : matcher.group(1);

// Extracts value outside of parentheses if they were present (changed from returning value
// inside parens, 5/24 - @rcoleb)
String val = matcher.group(1);

if (val.contains(SPEC_DELIM)) {
// Was passed XX:YY
return parse(val.split(SPEC_DELIM));
}
// "0105" should be "01:05"
if (val.length() > 2 && val.charAt(0) == '0') {
return parse(new String[] {spec.substring(0, 2), spec.substring(2)});
if (val.length() > 2) {
if (val.charAt(0) == '0') {
return parse(new String[] {spec.substring(0, 2), spec.substring(2)});
} else {
// System.out.println();
}
}
iVal[0] = Integer.parseInt(val);
return parse(iVal);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
Expand All @@ -45,11 +44,7 @@

/** Persistent map of {@link HLAType}s to equivalent {@link SeroType}s. Use when converting. */
public final class AntigenDictionary implements Serializable {
private static final long serialVersionUID = 10L;

public static final int LATEST_REVISION =
1 + HLAType.LATEST_REVISION + SeroType.LATEST_REVISION + Antigen.LATEST_REVISION;
private int revision = LATEST_REVISION;
private static final long serialVersionUID = 12L;

public static final String REL_DNA_SER_PROP = "rel.dna.ser.file";

Expand Down Expand Up @@ -178,6 +173,7 @@ private static void parseDictionaries(Callable<Reader> readerSupplier) {
}
ImmutableSetMultimap.Builder<HLAType, SeroType> hlaBuilder = ImmutableSetMultimap.builder();
ImmutableSetMultimap.Builder<SeroType, HLAType> seroBuilder = ImmutableSetMultimap.builder();

// NB: what's considered a valid HLA type diverges from the HLA map keyset and thus must be
// tracked separately
Builder<HLAType> validHLATypes = ImmutableSet.builder();
Expand Down Expand Up @@ -208,16 +204,20 @@ private static void parseDictionaries(Callable<Reader> readerSupplier) {

// Parse out the serological specificities for this mapping
// Since the columns are ordered by specificity, we use the first column with valid entries
Set<String> seroSpecs = new HashSet<>();
for (int i = 2; i <= 5 && i < columns.length; i++) {
String types = columns[i];
// Each HLA type may map to multiple serotypes
for (String t : types.split(TYPE_DELIM)) {
if (!t.isEmpty()) {
seroSpecs.add(t);
}
}
// Set<String> seroSpecs = new LinkedHashSet<>();
List<String> seroSpecs = new ArrayList<>();
if (!columns[2].isEmpty()) {
seroSpecs.add(columns[2]);
}
// for (int i = 2; i <= 5 && i < columns.length; i++) {
// String types = columns[i];
// // Each HLA type may map to multiple serotypes
// for (String t : types.split(TYPE_DELIM)) {
// if (!t.isEmpty() && !seroSpecs.contains(t)) {
// seroSpecs.add(t);
// }
// }
// }

// Skip null types
if (seroSpecs.stream().anyMatch(NULL_TYPE::equals)) {
Expand All @@ -239,25 +239,26 @@ private static void parseDictionaries(Callable<Reader> readerSupplier) {
for (int i = 0; i < specValues.length; i++) {
specValues[i] = specValues[i].trim().replaceAll("[^0-9]", "");
spec.add(Integer.parseInt(specValues[i]));
}

HLAType hlaType = new HLAType(l, spec);

SeroLocus sl = l.sero();
for (String t : seroSpecs) {
// Convert unknown types to the first spec value
if (UNKNOWN_TYPE.equals(t)) {
t = specValues[0];
}
SeroType seroType = new SeroType(sl, t);
hlaBuilder.put(hlaType, seroType);

HLAType hlaType = new HLAType(l, spec);

SeroLocus sl = l.sero();
for (String t : seroSpecs) {
// Convert unknown types to the first spec value
if (UNKNOWN_TYPE.equals(t)) {
t = specValues[0];
}
SeroType seroType = new SeroType(sl, t);
hlaBuilder.put(hlaType, seroType);

// Only map from sero > hla if we have 2 or more specificities
if (spec.size() > 1) {
validHLATypes.add(hlaType);
seroBuilder.put(seroType, hlaType);
}
// Only map from sero > hla if we have 2 or more specificities
if (spec.size() > 1) {
validHLATypes.add(hlaType);
seroBuilder.put(seroType, hlaType);
}
}
// }
}

// Build the singleton map and write it to disk
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@

/** {@link Locus} implementation for HLA types */
public enum HLALocus implements Locus<HLALocus> {
A(SeroLocus.A, TIER_1), B(SeroLocus.B, TIER_1), C(SeroLocus.C, TIER_1), DRB1(SeroLocus.DRB,
TIER_2), DRB3(SeroLocus.DRB, TIER_2), DRB4(SeroLocus.DRB, TIER_2), DRB5(SeroLocus.DRB,
TIER_2), DQA1(SeroLocus.DQA, TIER_2), DQB1(SeroLocus.DQB, TIER_2), DPA1(SeroLocus.DPA,
TIER_2), DPB1(SeroLocus.DPB, TIER_2), MICA(SeroLocus.MICA, TIER_2);
A(SeroLocus.A, MHC_CLASS_1), B(SeroLocus.B, MHC_CLASS_1), C(SeroLocus.C, MHC_CLASS_1), DRB1(SeroLocus.DRB,
MHC_CLASS_2), DRB3(SeroLocus.DRB, MHC_CLASS_2), DRB4(SeroLocus.DRB, MHC_CLASS_2), DRB5(SeroLocus.DRB,
MHC_CLASS_2), DQA1(SeroLocus.DQA, MHC_CLASS_2), DQB1(SeroLocus.DQB, MHC_CLASS_2), DPA1(SeroLocus.DPA,
MHC_CLASS_2), DPB1(SeroLocus.DPB, MHC_CLASS_2), MICA(SeroLocus.MICA, MHC_CLASS_2);

private final SeroLocus sero;
private final int tier;
Expand Down
93 changes: 82 additions & 11 deletions src/main/java/org/pankratzlab/unet/deprecated/hla/HLAType.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@
*/
package org.pankratzlab.unet.deprecated.hla;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import com.google.common.primitives.Ints;

/** {@link Antigen} implementation for HLA antigens */
Expand Down Expand Up @@ -85,27 +86,54 @@ public HLAType(HLALocus l, List<Integer> p) {
* Note: this method is similar to {@link AntigenDictionary#lookup(HLAType)}, with two exceptions:
*
* <ul>
* <li>In the case of multiple {@link SeroType} mappings, only the first will be returned
* <li><b>In the case of multiple {@link SeroType} mappings, only the first will be returned</b>
* <li>If there is no explicit mapping for this type, a {@code SeroType} will be created using the
* equivalent {@link SeroLocus} and the first value in this type's {@link #spec()}
* </ul>
*
* @return {@link SeroType} equivalent of this antigen
* @throws IllegalStateException If this type has ambiguous serotype equivalencies
* @return {@link SeroType} equivalent of this antigen <br />
* <br />
*
*/
public SeroType equiv() {
Set<SeroType> lookup = new HashSet<>();
try {
Set<SeroType> lookup = AntigenDictionary.lookup(this);
if (lookup.size() == 1) {
return lookup.iterator().next();
} else if (lookup.size() > 1) {
throw new IllegalStateException(
"HLA type: " + this + " has multiple serotype equivalencies");
}
lookup = AntigenDictionary.lookup(this);
} catch (IllegalArgumentException e) {
if (this.resolution() < 3) {
// grow spec with 01s until a mapping (if any) is found)
HLAType t = this;
while (t != null && lookup.isEmpty()) {
t = growSpec(t);
try {
lookup = AntigenDictionary.lookup(t);
} catch (IllegalArgumentException e2) {
// might not need this
lookup = new HashSet<>();
}
}
} else if (this.resolution() > 2) {
// reduce spec by removing any 01s
HLAType t = this;
while (t != null && lookup.isEmpty()) {
t = reduceSpec(t);
try {
lookup = AntigenDictionary.lookup(t);
} catch (IllegalArgumentException e2) {
// might not need this
lookup = new HashSet<>();
}
}
}
// No-op
}

if (lookup.size() == 1) {
return lookup.iterator().next();
} else if (lookup.size() > 1) {
return lookup.iterator().next();
}

// If we don't have an explicit mapping of this HLAType, just use the first spec
return lowResEquiv();
}
Expand Down Expand Up @@ -148,6 +176,49 @@ protected List<Integer> parse(int[] p) {
return values;
}

/**
* @param equivType Input type to reduce
* @return The input {@link HLAType} with its tailing "01" field removed, or null if the allele
* can not be reduced
*/
public static HLAType reduceSpec(HLAType equivType) {
List<Integer> spec = equivType.spec();

if (spec.size() < 2 || (spec.size() < 4 && spec.get(spec.size() - 1) != 1)) {
// We can only remove a trailing "01 "specificity, and only if we have 3- or more fields
return null;
}

spec = spec.subList(0, spec.size() - 1);
return HLAType.modifiedSpec(equivType, spec);
}

/**
* @param equivType Input type to expand
* @return The input {@link HLAType} with an additional "01" field, or null if the allele can not
* be further expanded
*/
public static HLAType growSpec(HLAType equivType) {
List<Integer> spec = new ArrayList<>(equivType.spec());

if (spec.size() >= 4) {
// We can only expand 2- and 3-field specificities
return null;
}

spec.add(1);

return HLAType.modifiedSpec(equivType, spec);
}

/** Helper method to create an updated HLAType */
private static HLAType modifiedSpec(HLAType equivType, List<Integer> spec) {
if (equivType instanceof NullType) {
return new NullType(equivType.locus(), spec);
}
return new HLAType(equivType.locus(), spec);
}

/** @see Antigen#is(String, Pattern) */
public static boolean is(String text) {
return Antigen.is(text, TYPE_PATTERN);
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/pankratzlab/unet/deprecated/hla/Locus.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@

/** Marker interface for antigen loci */
public interface Locus<L> extends Comparable<L> {
public static int TIER_1 = 1;
public static int TIER_2 = 2;
public static int MHC_CLASS_1 = 1;
public static int MHC_CLASS_2 = 2;

/** @return Name of this locus */
String name();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@

/** {@link Locus} implementation for serological types */
public enum SeroLocus implements Locus<SeroLocus> {
A(2, TIER_1), B(2, TIER_1), C(3, TIER_1), DRB(1, TIER_2, "DR"), DQB(1, TIER_2, "DQ"), DQA(1,
TIER_2), DPB(1, TIER_2, "DP"), DPA(1, TIER_2), MICA(1, -1);
A(2, MHC_CLASS_1), B(2, MHC_CLASS_1), C(3, MHC_CLASS_1), DRB(1, MHC_CLASS_2, "DR"), DQB(1, MHC_CLASS_2, "DQ"), DQA(1,
MHC_CLASS_2), DPB(1, MHC_CLASS_2, "DP"), DPA(1, MHC_CLASS_2), MICA(1, -1);

private final int severity;
private final int tier;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
package org.pankratzlab.unet.deprecated.hla;

public enum SourceType {
Score6, SureTyper, DonorNet;
}
Loading

0 comments on commit 41cbec2

Please sign in to comment.