Page Menu
Home
DevCentral
Search
Configure Global Search
Log In
Files
F27327249
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
2 KB
Referenced Files
None
Subscribers
None
View Options
diff --git a/src/Strings/SorensenDiceCoefficient.php b/src/Strings/SorensenDiceCoefficient.php
index 040f352..12913b4 100644
--- a/src/Strings/SorensenDiceCoefficient.php
+++ b/src/Strings/SorensenDiceCoefficient.php
@@ -1,55 +1,74 @@
<?php
declare(strict_types=1);
namespace Keruald\OmniTools\Strings;
use Keruald\OmniTools\Strings\Multibyte\OmniString;
+/**
+ * Computes the Sørensen–Dice coefficient, a statistic used to evaluate
+ * the similarity between two strings.
+ */
class SorensenDiceCoefficient {
/**
* @var string[]
*/
- private $x;
+ private array $x;
/**
* @var string[]
*/
- private $y;
+ private array $y;
///
/// Constructors
///
+ /**
+ * @param string $left The first string to compare
+ * @param string $right The second string to compare
+ */
public function __construct (string $left, string $right) {
$this->x = (new OmniString($left))->getBigrams();
$this->y = (new OmniString($right))->getBigrams();
}
+ /**
+ * Allows to directly compute the coefficient between two strings.
+ *
+ * @param string $left The first string to compare
+ * @param string $right The second string to compare
+ *
+ * @return float The Sørensen–Dice coefficient for the two specified strings.
+ */
public static function computeFor(string $left, string $right) : float {
$instance = new self($left, $right);
return $instance->compute();
}
///
/// Sørensen formula
///
+ /**
+ * @return float The Sørensen–Dice coefficient.
+ */
public function compute() : float {
return 2 * $this->countIntersect()
/
$this->countCharacters();
}
private function countIntersect () : int {
$intersect = array_intersect($this->x, $this->y);
return count($intersect);
}
private function countCharacters () : int {
return count($this->x) + count($this->y);
}
}
diff --git a/tests/Strings/SorensenDiceCoefficientTest.php b/tests/Strings/SorensenDiceCoefficientTest.php
index 2e7d17c..c69aafa 100644
--- a/tests/Strings/SorensenDiceCoefficientTest.php
+++ b/tests/Strings/SorensenDiceCoefficientTest.php
@@ -1,17 +1,24 @@
<?php
declare(strict_types=1);
namespace Keruald\OmniTools\Tests\Strings;
use Keruald\OmniTools\Strings\SorensenDiceCoefficient;
use PHPUnit\Framework\TestCase;
class SorensenDiceCoefficientTest extends TestCase {
public function testCoefficient () : void {
$actual = new SorensenDiceCoefficient('night', 'nacht');
$this->assertEquals(0.25, $actual->compute());
}
+ public function testComputeFor () : void {
+ $score = SorensenDiceCoefficient::computeFor('night', 'nacht');
+
+ $this->assertGreaterThan(0, $score);
+ $this->assertLessThan(1, $score);
+ }
+
}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Tue, May 5, 11:49 (15 h, 43 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3684471
Default Alt Text
(2 KB)
Attached To
Mode
rKOT Keruald OmniTools
Attached
Detach File
Event Timeline
Log In to Comment