Page MenuHomeDevCentral

No OneTemporary

diff --git a/src/Strings/Multibyte/OmniString.php b/src/Strings/Multibyte/OmniString.php
index 28559e6..c62550b 100644
--- a/src/Strings/Multibyte/OmniString.php
+++ b/src/Strings/Multibyte/OmniString.php
@@ -1,80 +1,102 @@
<?php
declare(strict_types=1);
namespace Keruald\OmniTools\Strings\Multibyte;
class OmniString {
use WithEncoding;
///
/// Private members
///
/**
* @var string
*/
private $value;
///
/// Constructor
///
public function __construct (string $value = '', string $encoding = '') {
$this->value = $value;
$this->setEncoding($encoding ?: "UTF-8");
}
///
/// Magic methods
///
public function __toString() : string {
return $this->value;
}
///
/// Helper methods
///
public function pad(
int $padLength = 0,
string $padString = ' ',
int $padType = STR_PAD_RIGHT
) : string {
return (new StringPad)
->setInput($this->value)
->setEncoding($this->encoding)
->setPadLength($padLength)
->setPadString($padString)
->setPadType($padType)
->pad();
}
public function startsWith (string $start) : bool {
return StringUtilities::startsWith($this->value, $start);
}
public function endsWith (string $end) : bool {
return StringUtilities::endsWith($this->value, $end);
}
public function len () : int {
return mb_strlen($this->value, $this->encoding);
}
+ public function getChars () : array {
+ $chars = [];
+
+ $len = $this->len();
+ for ($i = 0 ; $i < $len ; $i++) {
+ $chars[] = mb_substr($this->value, $i, 1, $this->encoding);
+ }
+
+ return $chars;
+ }
+
+ public function getBigrams () {
+ $bigrams = [];
+
+ $len = $this->len();
+ for ($i = 0 ; $i < $len - 1 ; $i++) {
+ $bigrams[] = mb_substr($this->value, $i, 2, $this->encoding);
+ }
+
+ return $bigrams;
+ }
+
/**
* @return string
*/
public function getValue () : string {
return $this->value;
}
/**
* @param string $value
*/
public function setValue (string $value) {
$this->value = $value;
}
}
diff --git a/src/Strings/SorensenDiceCoefficient.php b/src/Strings/SorensenDiceCoefficient.php
new file mode 100644
index 0000000..040f352
--- /dev/null
+++ b/src/Strings/SorensenDiceCoefficient.php
@@ -0,0 +1,55 @@
+<?php
+declare(strict_types=1);
+
+namespace Keruald\OmniTools\Strings;
+
+use Keruald\OmniTools\Strings\Multibyte\OmniString;
+
+class SorensenDiceCoefficient {
+
+ /**
+ * @var string[]
+ */
+ private $x;
+
+ /**
+ * @var string[]
+ */
+ private $y;
+
+ ///
+ /// Constructors
+ ///
+
+ public function __construct (string $left, string $right) {
+ $this->x = (new OmniString($left))->getBigrams();
+ $this->y = (new OmniString($right))->getBigrams();
+ }
+
+ public static function computeFor(string $left, string $right) : float {
+ $instance = new self($left, $right);
+
+ return $instance->compute();
+ }
+
+ ///
+ /// Sørensen formula
+ ///
+
+ public function compute() : float {
+ return 2 * $this->countIntersect()
+ /
+ $this->countCharacters();
+ }
+
+ private function countIntersect () : int {
+ $intersect = array_intersect($this->x, $this->y);
+
+ return count($intersect);
+ }
+
+ private function countCharacters () : int {
+ return count($this->x) + count($this->y);
+ }
+
+}
diff --git a/tests/Strings/Multibyte/OmniStringTest.php b/tests/Strings/Multibyte/OmniStringTest.php
index d9b30da..3e9389b 100644
--- a/tests/Strings/Multibyte/OmniStringTest.php
+++ b/tests/Strings/Multibyte/OmniStringTest.php
@@ -1,52 +1,97 @@
<?php
declare(strict_types=1);
namespace Keruald\OmniTools\Tests\Strings\Multibyte;
use Keruald\OmniTools\Strings\Multibyte\OmniString;
use PHPUnit\Framework\TestCase;
class OmniStringTest extends TestCase {
/**
* @var OmniString
*/
private $string;
protected function setUp () {
$this->string = new OmniString("foo");
}
public function testToString () : void {
$this->assertEquals("foo", (string)$this->string);
$this->assertEquals("foo", $this->string->__toString());
}
public function testPad () : void {
$paddedString = $this->string->pad(9, '-=-', STR_PAD_BOTH);
$this->assertEquals("-=-foo-=-", $paddedString);
}
public function testStartsWith () : void {
$this->assertTrue($this->string->startsWith("fo"));
$this->assertTrue($this->string->startsWith(""));
$this->assertTrue($this->string->startsWith("foo"));
$this->assertFalse($this->string->startsWith("Fo"));
$this->assertFalse($this->string->startsWith("bar"));
}
public function testEndsWith () : void {
$this->assertTrue($this->string->endsWith("oo"));
$this->assertTrue($this->string->endsWith(""));
$this->assertTrue($this->string->endsWith("foo"));
$this->assertFalse($this->string->endsWith("oO"));
$this->assertFalse($this->string->endsWith("bar"));
}
public function testLen () : void {
$this->assertEquals(3, $this->string->len());
}
+ /**
+ * @dataProvider provideCharactersArrays
+ */
+ public function testGetChars (string $string, array $expectedCharacters) : void {
+ $actualCharacters = (new OmniString($string))->getChars();
+
+ $this->assertEquals($expectedCharacters, $actualCharacters);
+ }
+
+ /**
+ * @dataProvider provideCharactersBigrams
+ */
+ public function testBigrams (string $string, array $expectedBigrams) : void {
+ $actualBigrams = (new OmniString($string))->getBigrams();
+
+ $this->assertEquals($expectedBigrams, $actualBigrams);
+ }
+
+ ///
+ /// Data providers
+ ///
+
+ public function provideCharactersArrays () : iterable {
+ yield ["foo", ['f', 'o', 'o']];
+
+ yield [
+ 'àèòàFOOàèòà',
+ ['à', 'è', 'ò', 'à', 'F', 'O', 'O', 'à', 'è', 'ò', 'à']
+ ];
+
+ yield ["🇩🇪", ["🇩", "🇪"]];
+
+ yield ["", []];
+ }
+
+ public function provideCharactersBigrams () : iterable {
+ yield ["foo", ['fo', 'oo']];
+
+ yield ["night", ['ni', 'ig', 'gh', 'ht']];
+
+ yield ["🇩🇪", ["🇩🇪"]];
+
+ yield ["", []];
+ }
+
}
diff --git a/tests/Strings/SorensenDiceCoefficientTest.php b/tests/Strings/SorensenDiceCoefficientTest.php
new file mode 100644
index 0000000..2e7d17c
--- /dev/null
+++ b/tests/Strings/SorensenDiceCoefficientTest.php
@@ -0,0 +1,17 @@
+<?php
+declare(strict_types=1);
+
+namespace Keruald\OmniTools\Tests\Strings;
+
+use Keruald\OmniTools\Strings\SorensenDiceCoefficient;
+use PHPUnit\Framework\TestCase;
+
+class SorensenDiceCoefficientTest extends TestCase {
+
+ public function testCoefficient () : void {
+ $actual = new SorensenDiceCoefficient('night', 'nacht');
+
+ $this->assertEquals(0.25, $actual->compute());
+ }
+
+}

File Metadata

Mime Type
text/x-diff
Expires
Mon, Nov 25, 07:44 (1 d, 18 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2256192
Default Alt Text
(7 KB)

Event Timeline