Page MenuHomeDevCentral

No OneTemporary

diff --git a/omnitools/src/Strings/Multibyte/OmniString.php b/omnitools/src/Strings/Multibyte/OmniString.php
index 5e94aa3..7b8b853 100644
--- a/omnitools/src/Strings/Multibyte/OmniString.php
+++ b/omnitools/src/Strings/Multibyte/OmniString.php
@@ -1,125 +1,183 @@
<?php
declare(strict_types=1);
namespace Keruald\OmniTools\Strings\Multibyte;
use Keruald\OmniTools\Collections\Vector;
+/**
+ * Represents a multibyte string and perform operations with the grapheme
+ * library for UTF-8 encoding, and mbstring for other encodings.
+ */
class OmniString {
use WithEncoding;
///
/// Private members
///
- /**
- * @var string
- */
- private $value;
+ private string $value;
///
/// Constructor
///
public function __construct (string $value = '', string $encoding = '') {
$this->value = $value;
$this->setEncoding($encoding ?: "UTF-8");
}
///
/// Magic methods
///
public function __toString() : string {
return $this->value;
}
///
/// Helper methods
///
public function pad(
int $padLength = 0,
string $padString = ' ',
int $padType = STR_PAD_RIGHT
) : string {
return (new StringPad)
->setInput($this->value)
->setEncoding($this->encoding)
->setPadLength($padLength)
->setPadString($padString)
->setPadType($padType)
->pad();
}
public function startsWith (string $start) : bool {
return str_starts_with($this->value, $start);
}
public function endsWith (string $end) : bool {
return str_ends_with($this->value, $end);
}
+ /**
+ * @deprecated Use more specific method to express your intent:
+ * countBytes, countCodePoints or countGraphemes
+ */
public function len () : int {
+ return $this->countGraphemes();
+ }
+
+ public function countBytes () : int {
+ return strlen($this->value);
+ }
+
+ public function countCodePoints () : int {
return mb_strlen($this->value, $this->encoding);
}
- public function getChars () : array {
+ public function countGraphemes () : int {
+ return match ($this->encoding) {
+ "UTF-8" => grapheme_strlen($this->value),
+ default => $this->countCodepoints(),
+ };
+ }
+
+ public function getBytes() : array {
+ return str_split($this->value, 1);
+ }
+
+ public function getCodePoints () : array {
+ return mb_str_split($this->value, 1, $this->encoding);
+ }
+
+ public function getGraphemes () : array {
+ if ($this->encoding !== "UTF-8") {
+ return $this->getCodePoints();
+ }
+
$chars = [];
- $len = $this->len();
+ $len = grapheme_strlen($this->value);
for ($i = 0 ; $i < $len ; $i++) {
- $chars[] = mb_substr($this->value, $i, 1, $this->encoding);
+ $chars[] = grapheme_substr($this->value, $i, 1);
}
return $chars;
}
+ /**
+ * @deprecated Use more specific method to express your intent:
+ * getBytes, getCodePoints or getGraphemes
+ */
+ public function getChars () : array {
+ return $this->getGraphemes();
+ }
+
public function getBigrams () : array {
+ return match ($this->encoding) {
+ "UTF-8" => $this->getBigramsFromGraphemes(),
+ default => $this->getBigramsFromCodePoints(),
+ };
+ }
+
+ private function getBigramsFromGraphemes() : array {
+ $bigrams = [];
+
+ $len = grapheme_strlen($this->value);
+ for ($i = 0 ; $i < $len - 1 ; $i++) {
+ $bigrams[] = grapheme_substr($this->value, $i, 2);
+ }
+
+ return $bigrams;
+ }
+
+ private function getBigramsFromCodePoints() : array {
$bigrams = [];
- $len = $this->len();
+ $len = mb_strlen($this->value, $this->encoding);
for ($i = 0 ; $i < $len - 1 ; $i++) {
$bigrams[] = mb_substr($this->value, $i, 2, $this->encoding);
}
return $bigrams;
}
///
/// Transformation methods
///
public function explode (string $delimiter,
int $limit = PHP_INT_MAX) : Vector {
if ($delimiter === "") {
if ($limit < 0) {
return new Vector;
}
return new Vector([$this->value]);
}
return new Vector(explode($delimiter, $this->value, $limit));
}
///
/// Getters and setters
///
/**
* @return string
*/
public function getValue () : string {
return $this->value;
}
/**
* @param string $value
*/
public function setValue (string $value) : void {
$this->value = $value;
}
}
diff --git a/omnitools/src/Strings/Multibyte/StringUtilities.php b/omnitools/src/Strings/Multibyte/StringUtilities.php
index 7702467..7172642 100644
--- a/omnitools/src/Strings/Multibyte/StringUtilities.php
+++ b/omnitools/src/Strings/Multibyte/StringUtilities.php
@@ -1,97 +1,94 @@
<?php
declare(strict_types=1);
namespace Keruald\OmniTools\Strings\Multibyte;
class StringUtilities {
/**
* Pads a multibyte string to a certain length with another string
*
* @param string $input the input string
* @param int $padLength the target string size
* @param string $padString the padding characters (optional, default is space)
* @param int $padType STR_PAD_RIGHT, STR_PAD_LEFT, or STR_PAD_BOTH (optional, default is STR_PAD_RIGHT)
* @param string $encoding the character encoding (optional)
*
* @return string the padded string
*
*/
public static function pad (
string $input,
int $padLength,
string $padString = ' ',
int $padType = STR_PAD_RIGHT,
string $encoding = ''
) : string {
return (new StringPad)
->setInput($input)
->setPadLength($padLength)
->setPadString($padString)
->setPadType($padType)
- ->setEncoding($encoding ?: mb_internal_encoding())
+ ->setEncoding($encoding ?: "UTF-8")
->pad();
}
public static function isSupportedEncoding (string $encoding) : bool {
- foreach (mb_list_encodings() as $supportedEncoding) {
- if ($encoding === $supportedEncoding) {
- return true;
- }
- }
-
- return false;
+ return match ($encoding) {
+ "UTF-8" => true,
+ default => in_array($encoding, mb_list_encodings()),
+ };
}
/**
* @deprecated Since PHP 8.0, we can replace by \str_starts_with
*/
public static function startsWith (string $string, string $start) : bool {
$length = mb_strlen($start);
return mb_substr($string, 0, $length) === $start;
}
/**
* @deprecated Since PHP 8.0, we can replace by \str_ends_with
*/
public static function endsWith (string $string, string $end) : bool {
$length = mb_strlen($end);
return $length === 0 || mb_substr($string, -$length) === $end;
}
/**
* @deprecated Since PHP 8.0, we can replace by \str_contains
*/
public static function contains (string $string, string $needle) : bool {
return str_contains($string, $needle);
}
/**
* Encode a string using a variant of the MIME base64 compatible with URLs.
*
* The '+' and '/' characters used in base64 are replaced by '-' and '_'.
* The '=' padding is removed.
*
* @param string $string The string to encode
* @return string The encoded string
*/
public static function encodeInBase64 (string $string) : string {
return str_replace(
['+', '/', '='],
['-', '_', ''],
base64_encode($string)
);
}
/**
* Decode a string encoded with StringUtilities::encodeInBase64
*
* @param string $string The string to decode
* @return string The decoded string
*/
public static function decodeFromBase64 (string $string) : string {
$toDecode = str_replace(['-', '_'], ['+', '/'], $string);
return base64_decode($toDecode);
}
}
diff --git a/omnitools/tests/Strings/Multibyte/OmniStringTest.php b/omnitools/tests/Strings/Multibyte/OmniStringTest.php
index 06fadf1..911069a 100644
--- a/omnitools/tests/Strings/Multibyte/OmniStringTest.php
+++ b/omnitools/tests/Strings/Multibyte/OmniStringTest.php
@@ -1,116 +1,214 @@
<?php
declare(strict_types=1);
namespace Keruald\OmniTools\Tests\Strings\Multibyte;
+use Keruald\OmniTools\Collections\Vector;
use Keruald\OmniTools\Strings\Multibyte\OmniString;
use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\TestCase;
class OmniStringTest extends TestCase {
private OmniString $string;
protected function setUp () : void {
$this->string = new OmniString("foo");
}
public function testToString () : void {
$this->assertEquals("foo", (string)$this->string);
$this->assertEquals("foo", $this->string->__toString());
}
public function testPad () : void {
$paddedString = $this->string->pad(9, '-=-', STR_PAD_BOTH);
$this->assertEquals("-=-foo-=-", $paddedString);
}
public function testStartsWith () : void {
$this->assertTrue($this->string->startsWith("fo"));
$this->assertTrue($this->string->startsWith(""));
$this->assertTrue($this->string->startsWith("foo"));
$this->assertFalse($this->string->startsWith("Fo"));
$this->assertFalse($this->string->startsWith("bar"));
}
public function testEndsWith () : void {
$this->assertTrue($this->string->endsWith("oo"));
$this->assertTrue($this->string->endsWith(""));
$this->assertTrue($this->string->endsWith("foo"));
$this->assertFalse($this->string->endsWith("oO"));
$this->assertFalse($this->string->endsWith("bar"));
}
public function testLen () : void {
$this->assertEquals(3, $this->string->len());
}
+ #[DataProvider("provideLengthCounts")]
+ public function testCountBytes ($string, $bytes, $codePoints, $graphemes) : void {
+ $count = (new OmniString($string))->countBytes();
+ $this->assertEquals($bytes, $count);
+ }
+
+ #[DataProvider("provideLengthCounts")]
+ public function testCountCodePoints ($string, $bytes, $codePoints, $graphemes) : void {
+ $count = (new OmniString($string))->countCodePoints();
+ $this->assertEquals($codePoints, $count);
+ }
+
+ #[DataProvider("provideLengthCounts")]
+ public function testCountGraphemes ($string, $bytes, $codePoints, $graphemes) : void {
+ $count = (new OmniString($string))->countGraphemes();
+ $this->assertEquals($graphemes, $count);
+ }
+
#[DataProvider('provideCharactersArrays')]
public function testGetChars (string $string, array $expectedCharacters) : void {
$actualCharacters = (new OmniString($string))->getChars();
$this->assertEquals($expectedCharacters, $actualCharacters);
}
+ #[DataProvider("provideBytes")]
+ public function testGetBytes (string $string, array $expectedBytes) : void {
+ $actual = (new OmniString($string))->getBytes();
+
+ $this->assertEquals($expectedBytes, $actual);
+ }
+
+ #[DataProvider("provideCodePoints")]
+ public function testGetCodePoints (string $string, array $expectedCodePoints) : void {
+ $actual = (new OmniString($string))->getCodePoints();
+
+ $this->assertEquals($expectedCodePoints, $actual);
+ }
+
+ #[DataProvider("provideGraphemes")]
+ public function testGetGraphemes (string $string, array $expectedGraphemes) : void {
+ $actual = (new OmniString($string))->getGraphemes();
+
+ $this->assertEquals($expectedGraphemes, $actual);
+ }
+
#[DataProvider('provideCharactersBigrams')]
public function testBigrams (string $string, array $expectedBigrams) : void {
$actualBigrams = (new OmniString($string))->getBigrams();
$this->assertEquals($expectedBigrams, $actualBigrams);
}
#[DataProvider('provideExplosions')]
public function testExplode (string $delimiter, string $imploded, array $exploded) : void {
$actual = (new OmniString($imploded))
->explode($delimiter)
->toArray();
$this->assertEquals($exploded, $actual);
}
public function testExplodeWithEmptyOmniArray () : void {
$array = (new OmniString("foo"))
->explode("", -1);
$this->assertEquals(0, count($array->toArray()));
}
///
/// Data providers
///
public static function provideCharactersArrays () : iterable {
yield ["foo", ['f', 'o', 'o']];
yield [
'àèòàFOOàèòà',
['à', 'è', 'ò', 'à', 'F', 'O', 'O', 'à', 'è', 'ò', 'à']
];
- yield ["🇩🇪", ["🇩", "🇪"]];
+ yield ["🇩🇪", ["🇩🇪"]];
yield ["", []];
}
public static function provideCharactersBigrams () : iterable {
yield ["foo", ['fo', 'oo']];
yield ["night", ['ni', 'ig', 'gh', 'ht']];
- yield ["🇩🇪", ["🇩🇪"]];
+ yield ["x", []]; // Only one character -> no bigram
+
+ yield ["🇩🇪", []]; // Only one character -> no bigram
yield ["", []];
}
+
public static function provideExplosions () : iterable {
yield ["/", "a/b/c", ['a', 'b', 'c']];
yield ["/", "abc", ['abc']];
yield ["/", "/b/c", ['', 'b', 'c']];
yield ["/", "a/b/", ['a', 'b', '']];
yield ["", "a/b/c", ['a/b/c']];
yield ["x", "a/b/c", ['a/b/c']];
}
+ public static function provideLengthCounts () : iterable {
+ // Character, bytes, code points, graphemes
+ yield ["🏴󠁧󠁢󠁥󠁮󠁧󠁿", 28, 7, 1];
+
+ yield ["", 0, 0, 0];
+ yield ["a", 1, 1, 1];
+ yield ["foo", 3, 3, 3];
+ yield ["é", 2, 1, 1];
+
+ yield ["\0", 1, 1, 1]; // PHP strings are NOT null-terminated
+ }
+
+ public static function provideBytes () : iterable {
+ yield ["🏴󠁧󠁢󠁥󠁮󠁧󠁿", [
+ "\xF0", "\x9F", "\x8F", "\xB4",
+ "\xF3", "\xA0", "\x81", "\xA7",
+ "\xF3", "\xA0", "\x81", "\xA2",
+ "\xF3", "\xA0", "\x81", "\xA5",
+ "\xF3", "\xA0", "\x81", "\xAE",
+ "\xF3", "\xA0", "\x81", "\xA7",
+ "\xF3", "\xA0", "\x81", "\xBF",
+ ]];
+
+ yield ["", []];
+ yield ["a", ["a"]];
+ yield ["foo", ["f", "o", "o"]];
+ yield ["é", ["\xC3", "\xA9"]];
+ }
+
+ public static function provideCodePoints () : iterable {
+ yield ["🏴󠁧󠁢󠁥󠁮󠁧󠁿", [
+ "\xF0\x9F\x8F\xB4",
+ "\xF3\xA0\x81\xA7",
+ "\xF3\xA0\x81\xA2",
+ "\xF3\xA0\x81\xA5",
+ "\xF3\xA0\x81\xAE",
+ "\xF3\xA0\x81\xA7",
+ "\xF3\xA0\x81\xBF",
+ ]];
+
+ yield ["", []];
+ yield ["a", ["a"]];
+ yield ["foo", ["f", "o", "o"]];
+ yield ["é", ["é"]];
+ }
+
+ public static function provideGraphemes () : iterable {
+ yield ["🏴󠁧󠁢󠁥󠁮󠁧󠁿", ["🏴󠁧󠁢󠁥󠁮󠁧󠁿"]];
+
+ yield ["", []];
+ yield ["a", ["a"]];
+ yield ["foo", ["f", "o", "o"]];
+ yield ["é", ["é"]];
+ }
+
}

File Metadata

Mime Type
text/x-diff
Expires
Mon, Sep 15, 07:22 (1 d, 7 h)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2973885
Default Alt Text
(15 KB)

Event Timeline