Skip to content

Commit

Permalink
Merge pull request #93 from grueneschweiz/less_duplicates
Browse files Browse the repository at this point in the history
This PR makes duplicate matching i bit more narrow so there are less ambiguous matches.
  • Loading branch information
Michael-Schaer authored Nov 18, 2024
2 parents 395a92f + 135fd4a commit de50eca
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 23 deletions.
97 changes: 80 additions & 17 deletions app/Repository/Member/MemberMatch.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class MemberMatch
const NO_MATCH = 0;

/**
* It's an unambiguous match of exaclty one member
* It's an unambiguous match of exactly one member
*/
const MATCH = 1;

Expand Down Expand Up @@ -69,7 +69,7 @@ private function __construct(int $status, array $matches)
$this->status = $status;
$this->matches = $matches;
}

/**
* Find duplicate members in given group or its subgroups
*
Expand Down Expand Up @@ -100,38 +100,71 @@ public static function match(
if (!empty($matches) && $member->firstName->getValue()) {
self::selectByFirstName($matches, $member->firstName->getValue());
}

if (count($matches)) {
return self::create($matches, false);
}
}


// find by phone and return if found
if ($member->mobilePhone->getValue()) {
$query = self::buildMobilePhoneQuery($member);
$matches = self::matchByQuery($query, $rootGroups, $memberRepository);
if (!empty($matches) && $member->firstName->getValue()) {
self::selectByFirstName($matches, $member->firstName->getValue());
}

if (count($matches)) {
return self::create($matches, false);
}
}

// don't proceed, if we don't have first and last name
if (!($member->firstName->getValue() && $member->lastName->getValue())) {
return new MemberMatch(self::NO_MATCH, []);
}

// search webling by first and last name
$query = self::buildNameQuery($member);
$matches = self::matchByQuery($query, $rootGroups, $memberRepository);

if (!empty($matches)) {
// make sure we filter out all entries where only the beginning of the
// name was identical, but the given name was no a short name
self::removeWrongNameMatches($matches, $member->firstName->getValue(),
$member->lastName->getValue());
}

if (!empty($matches) && $member->zip->getValue()) {
// filter out all results, where the zip didn't match
self::removeWrongZipMatches($matches, $member->zip->getValue());

return self::create($matches, false);
}

return self::create($matches, true);

// if there is more information available in the record, we create an ambiguous match
if (!empty($matches) && self::hasAdditionalInformation($member)) {
return self::create($matches, true);
}

return new MemberMatch(self::NO_MATCH, []);
}


/**
* Check if the member has a phone number or address assigned
*
* @param Member $member
*
* @return bool
*/
private static function hasAdditionalInformation(Member $member): bool {
return $member->mobilePhone->getValue() ||
$member->landlinePhone->getValue() ||
$member->workPhone->getValue() ||
$member->address1->getValue() ||
$member->address2->getValue();
}

/**
* Return webling query to find members by email
*
Expand All @@ -142,22 +175,41 @@ public static function match(
private static function buildEmailQuery(Member $member): string
{
// as webling compares casesensitive, transform everything to lower case.

$email1 = mb_strtolower(self::escape($member->email1->getWeblingValue()));
$email2 = mb_strtolower(self::escape($member->email2->getWeblingValue()));

$query = [];
if ($member->email1->getWeblingValue()) {
$query[] = "(LOWER(`{$member->email1->getWeblingKey()}`) = '$email1' OR LOWER(`{$member->email2->getWeblingKey()}`) = '$email1')";
}

if ($member->email2->getWeblingValue()) {
$query[] = "(LOWER(`{$member->email1->getWeblingKey()}`) = '$email2' OR LOWER(`{$member->email2->getWeblingKey()}`) = '$email2')";
}

return implode(' OR ', $query);
}


/**
* Return webling query to find members by mobile phone
*
* @param Member $member
*
* @return string
*/
private static function buildMobilePhoneQuery(Member $member): string
{
$mobilePhone = self::normalizePhoneNumber($member->mobilePhone->getWeblingValue());

$query = [];
if ($member->mobilePhone->getWeblingValue()) {
$query[] = "`{$member->mobilePhone->getWeblingKey()}` = '$mobilePhone'";
}

return implode(' OR ', $query);
}

/**
* Return members that matched the given query and log exceptions that
* should not occur here.
Expand Down Expand Up @@ -314,7 +366,7 @@ private static function removeWrongNameMatches(array &$matches, string $firstNam
{
foreach ($matches as $idx => $match) {
if (!self::isShortNameOf($firstName, $match->firstName->getValue())
&& !self::isShortNameOf($match->firstName->getValue(), $lastName)) {
&& !self::isShortNameOf($match->firstName->getValue(), $firstName)) {
unset($matches[$idx]);
}
if (!self::isShortNameOf($lastName, $match->lastName->getValue())
Expand Down Expand Up @@ -342,7 +394,18 @@ private static function removeWrongZipMatches(array &$matches, string $zip)
}
}
}


/**
* Normalize a phone number by removing non-digits and standardizing format.
*
* @param string $phoneNumber
* @return string
*/
private static function normalizePhoneNumber(string $phoneNumber): string
{
return preg_replace('/\D/', '', str_replace('+41', '0', $phoneNumber));
}

/**
* Escape single quotes
*
Expand Down
12 changes: 11 additions & 1 deletion tests/Feature/Http/Controllers/RestApi/RestApiMemberTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -1143,6 +1143,9 @@ public function testPostMatch_200_match_apostrophe()
],
'lastName' => [
'value' => $member->lastName->getValue(),
],
'email1' => [
'value' => $member->email1->getValue(),
]
];

Expand All @@ -1167,6 +1170,9 @@ public function testPostMatch_200_match_ampersand()
],
'lastName' => [
'value' => $member->lastName->getValue(),
],
'email1' => [
'value' => $member->email1->getValue(),
]
];

Expand All @@ -1182,14 +1188,18 @@ public function testPostMatch_200_match_ampersand()
public function testPostMatch_200_ambiguous()
{
$member = $this->getMember(__METHOD__);

$member->address1->setValue("Rue de l'Annonciade 22");

$m = [
'firstName' => [
'value' => $member->firstName->getValue(),
],
'lastName' => [
'value' => $member->lastName->getValue(),
],
'address1' => [
'value' => $member->address1->getValue(),
]
];

// precondition: assert we dont have any records with the same name
Expand Down
48 changes: 43 additions & 5 deletions tests/Unit/Repository/Member/MemberMatchTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ public function test__match()
*/
$member = $this->getNewMember();
$member->email1->setValue('');

// no match
$member1 = $this->saveMember(clone $member);
$member1->firstName->setValue('unknown');
Expand All @@ -176,16 +176,25 @@ public function test__match()
$this->assertEmpty($match->getMatches());
$this->assertEquals(0, $match->count());
$this->memberRepo->delete($member1);
// ambiguous match

// no zip, no address
$member1 = $this->saveMember(clone $member);
$member1->zip->setValue('');
$member1->mobilePhone->setValue('');
$match = MemberMatch::match($member1, [$this->group], $this->memberRepo);
$this->assertEquals(MemberMatch::NO_MATCH, $match->getStatus());
$this->assertEmpty($match->getMatches());
$this->assertEquals(0, $match->count());

// ambiguous match happens only if there is additional information available
$member1->address1->setValue('123 Test Street');
$member1->mobilePhone->setValue('');
$match = MemberMatch::match($member1, [$this->group], $this->memberRepo);
$this->assertEquals(MemberMatch::AMBIGUOUS_MATCH, $match->getStatus());
$this->assertEquals($member1->id, $match->getMatches()[0]->id);
$this->assertEquals(1, $match->count());
$this->memberRepo->delete($member1);

// single match
$member1 = $this->saveMember($member);
$match = MemberMatch::match($member, [$this->group], $this->memberRepo);
Expand All @@ -211,8 +220,37 @@ public function test__match()
$this->assertEmpty($match->getMatches());
$this->assertEquals(0, $match->count());
$this->memberRepo->delete($member1);

/**
* match by name and phone number
* phone number is only used if there is no email
*/
$member->mobilePhone->setValue('0761234567');

// correct phone number
$member1 = $this->saveMember(clone $member);
$member1->zip->setValue('');
$member1->address1->setValue('');
$member1->mobilePhone->setValue('+41 76 123 45 67');
$match = MemberMatch::match($member1, [$this->group], $this->memberRepo);
$this->assertEquals(MemberMatch::MATCH, $match->getStatus());
$this->assertEquals($member1->id, $match->getMatches()[0]->id);
$this->assertEquals(1, $match->count());
$this->memberRepo->delete($member1);

// wrong phone number
$member1 = $this->saveMember(clone $member);
$member1->zip->setValue('');
$member1->address1->setValue('');
$member1->mobilePhone->setValue('0797654321');
$match = MemberMatch::match($member1, [$this->group], $this->memberRepo);
$this->assertEquals(MemberMatch::AMBIGUOUS_MATCH, $match->getStatus());
$this->memberRepo->delete($member1);

// multiple matches
/**
* multiple matches
*/
$member->mobilePhone->setValue('');
$member1 = $this->saveMember($member);
$member2 = $this->saveMember($member);
$match = MemberMatch::match($member, [$this->group], $this->memberRepo);
Expand Down

0 comments on commit de50eca

Please sign in to comment.