if you for some reason need a base10 / pure-number encode instead, encoding to some combination of 0123456789
<?php
function base10_encode(string $str): string
{
$ret = "";
for ($i = 0, $imax = strlen($str); $i < $imax; ++ $i) {
$ret .= str_pad((string) ord($str[$i]), 3, "0", STR_PAD_LEFT);
}
return $ret;
}
function base10_decode(string $str): string
{
$ret = "";
for ($i = 0, $imax = strlen($str); $i < $imax; $i += 3) {
$ret .= chr((int) substr($str, $i, 3));
}
return $ret;
}
?>
it is unicode-safe and binary-safe, testing:
<?php
$d=[];
$d["raw"]="test";
$d["b10"]=base10_encode($d["raw"]); $d["decoded"]=base10_decode($d["b10"]); $d["corrupted"]=$d["raw"]!==$d["decoded"]; var_dump($d);
$d=[];
$d["raw"]="ˈmaʳkʊs kuːn ℕ ⊆ ℕ₀ ⊂ ℤ ⊂ ℚ ⊂ ℝ ⊂ ℂ, ⊥ < a ≠ b ≡ c ≤ d ≪ ⊤ ⇒ (A ⇔ B), Σὲ γνωρίζω ἀπὸ τὴν κόψη Οὐχὶ ταὐτὰ παρίσταταί გთხოვთ ሰማይ አይታረስ ንጉሥ አይከሰስ ᚻᛖ ᚳᚹᚫᚦ ᚦᚫᛏ ᚻᛖ ᛒᚢᛞᛖ ᚩᚾ ᚦᚫᛗ ᛚᚪᚾᛞᛖ ᚾᚩᚱᚦᚹᛖᚪᚱᛞᚢᛗ ᚹᛁᚦ ᚦᚪ ᚹᛖᛥᚫ ";
$d["raw"].="♔♕♖♗♘♙♚♛♜♝♞🙾🙿";
$d["b10"]=base10_encode($d["raw"]); $d["decoded"]=base10_decode($d["b10"]);
$d["corrupted"]=$d["raw"]!==$d["decoded"]; var_dump($d);
$everything="";
for($i=0;$i<=0xFF;++$i){
$everything.=chr($i);
}
$d=[];
$d["raw"]=$everything;
$d["b10"]=base10_encode($d["raw"]);
$d["decoded"]=base10_decode($d["b10"]);
$d["corrupted"]=$d["raw"]!==$d["decoded"]; var_dump($d);
?>