Page MenuHomeDevCentral

D2550.id6432.diff
No OneTemporary

D2550.id6432.diff

diff --git a/omnitools/src/Strings/Multibyte/OmniString.php b/omnitools/src/Strings/Multibyte/OmniString.php
--- a/omnitools/src/Strings/Multibyte/OmniString.php
+++ b/omnitools/src/Strings/Multibyte/OmniString.php
@@ -5,6 +5,10 @@
use Keruald\OmniTools\Collections\Vector;
+/**
+ * Represents a multibyte string and perform operations with the grapheme
+ * library for UTF-8 encoding, and mbstring for other encodings.
+ */
class OmniString {
use WithEncoding;
@@ -13,10 +17,7 @@
/// Private members
///
- /**
- * @var string
- */
- private $value;
+ private string $value;
///
/// Constructor
@@ -61,27 +62,84 @@
return str_ends_with($this->value, $end);
}
+ /**
+ * @deprecated Use more specific method to express your intent:
+ * countBytes, countCodePoints or countGraphemes
+ */
public function len () : int {
+ return $this->countGraphemes();
+ }
+
+ public function countBytes () : int {
+ return strlen($this->value);
+ }
+
+ public function countCodePoints () : int {
return mb_strlen($this->value, $this->encoding);
}
- public function getChars () : array {
+ public function countGraphemes () : int {
+ return match ($this->encoding) {
+ "UTF-8" => grapheme_strlen($this->value),
+ default => $this->countCodepoints(),
+ };
+ }
+
+ public function getBytes() : array {
+ return str_split($this->value, 1);
+ }
+
+ public function getCodePoints () : array {
+ return mb_str_split($this->value, 1, $this->encoding);
+ }
+
+ public function getGraphemes () : array {
+ if ($this->encoding !== "UTF-8") {
+ return $this->getCodePoints();
+ }
+
$chars = [];
- $len = $this->len();
+ $len = grapheme_strlen($this->value);
for ($i = 0 ; $i < $len ; $i++) {
- $chars[] = mb_substr($this->value, $i, 1, $this->encoding);
+ $chars[] = grapheme_substr($this->value, $i, 1);
}
return $chars;
}
+ /**
+ * @deprecated Use more specific method to express your intent:
+ * getBytes, getCodePoints or getGraphemes
+ */
+ public function getChars () : array {
+ return $this->getGraphemes();
+ }
+
public function getBigrams () : array {
+ return match ($this->encoding) {
+ "UTF-8" => $this->getBigramsFromGraphemes(),
+ default => $this->getBigramsFromCodePoints(),
+ };
+ }
+
+ private function getBigramsFromGraphemes() : array {
+ $bigrams = [];
+
+ $len = grapheme_strlen($this->value);
+ for ($i = 0 ; $i < $len - 1 ; $i++) {
+ $bigrams[] = grapheme_substr($this->value, $i, 2);
+ }
+
+ return $bigrams;
+ }
+
+ private function getBigramsFromCodePoints() : array {
$bigrams = [];
- $len = $this->len();
+ $len = mb_strlen($this->value, $this->encoding);
for ($i = 0 ; $i < $len - 1 ; $i++) {
- $bigrams[] = mb_substr($this->value, $i, 2, $this->encoding);
+ $bigrams[] = mb_substr($this->value, $i, 2,$this->encoding);
}
return $bigrams;
@@ -122,4 +180,5 @@
$this->value = $value;
}
+
}
diff --git a/omnitools/src/Strings/Multibyte/StringUtilities.php b/omnitools/src/Strings/Multibyte/StringUtilities.php
--- a/omnitools/src/Strings/Multibyte/StringUtilities.php
+++ b/omnitools/src/Strings/Multibyte/StringUtilities.php
@@ -29,18 +29,15 @@
->setPadLength($padLength)
->setPadString($padString)
->setPadType($padType)
- ->setEncoding($encoding ?: mb_internal_encoding())
+ ->setEncoding($encoding ?: "UTF-8")
->pad();
}
public static function isSupportedEncoding (string $encoding) : bool {
- foreach (mb_list_encodings() as $supportedEncoding) {
- if ($encoding === $supportedEncoding) {
- return true;
- }
- }
-
- return false;
+ return match ($encoding) {
+ "UTF-8" => true,
+ default => in_array($encoding, mb_list_encodings()),
+ };
}
/**

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 16, 12:26 (19 h, 26 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
2352956
Default Alt Text
D2550.id6432.diff (4 KB)

Event Timeline