Page MenuHomeDevCentral

D2052.id5265.diff
No OneTemporary

D2052.id5265.diff

diff --git a/src/Strings/Multibyte/OmniString.php b/src/Strings/Multibyte/OmniString.php
--- a/src/Strings/Multibyte/OmniString.php
+++ b/src/Strings/Multibyte/OmniString.php
@@ -63,6 +63,28 @@
return mb_strlen($this->value, $this->encoding);
}
+ public function getChars () : array {
+ $chars = [];
+
+ $len = $this->len();
+ for ($i = 0 ; $i < $len ; $i++) {
+ $chars[] = mb_substr($this->value, $i, 1, $this->encoding);
+ }
+
+ return $chars;
+ }
+
+ public function getBigrams () {
+ $bigrams = [];
+
+ $len = $this->len();
+ for ($i = 0 ; $i < $len - 1 ; $i++) {
+ $bigrams[] = mb_substr($this->value, $i, 2, $this->encoding);
+ }
+
+ return $bigrams;
+ }
+
/**
* @return string
*/
diff --git a/src/Strings/SorensenDiceCoefficient.php b/src/Strings/SorensenDiceCoefficient.php
new file mode 100644
--- /dev/null
+++ b/src/Strings/SorensenDiceCoefficient.php
@@ -0,0 +1,55 @@
+<?php
+declare(strict_types=1);
+
+namespace Keruald\OmniTools\Strings;
+
+use Keruald\OmniTools\Strings\Multibyte\OmniString;
+
+class SorensenDiceCoefficient {
+
+ /**
+ * @var string[]
+ */
+ private $x;
+
+ /**
+ * @var string[]
+ */
+ private $y;
+
+ ///
+ /// Constructors
+ ///
+
+ public function __construct (string $left, string $right) {
+ $this->x = (new OmniString($left))->getBigrams();
+ $this->y = (new OmniString($right))->getBigrams();
+ }
+
+ public static function computeFor(string $left, string $right) : float {
+ $instance = new self($left, $right);
+
+ return $instance->compute();
+ }
+
+ ///
+ /// Sørensen formula
+ ///
+
+ public function compute() : float {
+ return 2 * $this->countIntersect()
+ /
+ $this->countCharacters();
+ }
+
+ private function countIntersect () : int {
+ $intersect = array_intersect($this->x, $this->y);
+
+ return count($intersect);
+ }
+
+ private function countCharacters () : int {
+ return count($this->x) + count($this->y);
+ }
+
+}
diff --git a/tests/Strings/Multibyte/OmniStringTest.php b/tests/Strings/Multibyte/OmniStringTest.php
--- a/tests/Strings/Multibyte/OmniStringTest.php
+++ b/tests/Strings/Multibyte/OmniStringTest.php
@@ -49,4 +49,49 @@
$this->assertEquals(3, $this->string->len());
}
+ /**
+ * @dataProvider provideCharactersArrays
+ */
+ public function testGetChars (string $string, array $expectedCharacters) : void {
+ $actualCharacters = (new OmniString($string))->getChars();
+
+ $this->assertEquals($expectedCharacters, $actualCharacters);
+ }
+
+ /**
+ * @dataProvider provideCharactersBigrams
+ */
+ public function testBigrams (string $string, array $expectedBigrams) : void {
+ $actualBigrams = (new OmniString($string))->getBigrams();
+
+ $this->assertEquals($expectedBigrams, $actualBigrams);
+ }
+
+ ///
+ /// Data providers
+ ///
+
+ public function provideCharactersArrays () : iterable {
+ yield ["foo", ['f', 'o', 'o']];
+
+ yield [
+ 'àèòàFOOàèòà',
+ ['à', 'è', 'ò', 'à', 'F', 'O', 'O', 'à', 'è', 'ò', 'à']
+ ];
+
+ yield ["🇩🇪", ["🇩", "🇪"]];
+
+ yield ["", []];
+ }
+
+ public function provideCharactersBigrams () : iterable {
+ yield ["foo", ['fo', 'oo']];
+
+ yield ["night", ['ni', 'ig', 'gh', 'ht']];
+
+ yield ["🇩🇪", ["🇩🇪"]];
+
+ yield ["", []];
+ }
+
}
diff --git a/tests/Strings/SorensenDiceCoefficientTest.php b/tests/Strings/SorensenDiceCoefficientTest.php
new file mode 100644
--- /dev/null
+++ b/tests/Strings/SorensenDiceCoefficientTest.php
@@ -0,0 +1,17 @@
+<?php
+declare(strict_types=1);
+
+namespace Keruald\OmniTools\Tests\Strings;
+
+use Keruald\OmniTools\Strings\SorensenDiceCoefficient;
+use PHPUnit\Framework\TestCase;
+
+class SorensenDiceCoefficientTest extends TestCase {
+
+ public function testCoefficient () : void {
+ $actual = new SorensenDiceCoefficient('night', 'nacht');
+
+ $this->assertEquals(0.25, $actual->compute());
+ }
+
+}

File Metadata

Mime Type
text/plain
Expires
Sun, Dec 1, 04:21 (21 h, 50 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2274062
Default Alt Text
D2052.id5265.diff (4 KB)

Event Timeline