using mb_strlen in php 5+ to find multibyte strings and hex
Posted: Thu Apr 07, 2011 2:03 pm
Code: Select all
<html>
<body>
<style>
body, textarea, h3
{
font-family:courier;
}
</style>
hex chars example<br>
<?php
//-----
function hex_chars($data)
{
$mb_chars='';
$mb_hex='';
$lenS=mb_strlen($data,'UTF-8');
//
for($i=0;$i<$lenS;$i++)
{
$c=mb_substr($data,$i,1,'UTF-8');
$mb_chars.='{'.($c).'}';
$o=unpack('N',mb_convert_encoding($c,'UCS-4BE','UTF-8'));
$mb_hex.='{'.hex_format($o[1]).'}';
}
$chars='';
$hex='';
//
for($i=0;$i<strlen($data);$i++)
{
$c=substr($data,$i,1);
$chars.='{'.($c).'}';
$hex.='{'.hex_format(ord($c)).'}';
}
return array
(
'data'=>$data,
'chars'=>$chars,
'hex'=>$hex,
'mb_chars'=>$mb_chars,
'mb_hex'=>$mb_hex,
);
}
//-----
function hex_format($o)
{
$h=strtoupper(dechex($o));
$len=strlen($h);
if($len%2==1)
{
$h="0$h";
}
return $h;
}
//-----
$test="hello there my friends, this is a simple standard test of hex chars";
echo "strlen[test]".strlen($test)."<br>";
echo "mb_strlen(\$test,'utf-8')".mb_strlen($test,'utf-8')."<br>";// backslash is used (\) to escape the ($) and echo it out
$testH=hex_chars($test);
//
if(function_exists('mb_strlen'))
{
echo "<p>mb_strlen exists</p>";
}
else
{
echo "<p>mb_strlen does not exist, you should enable this by reinstalling with your php5+ installer and enabling mbstrings, or manually by searching for the process online.</p>";
}
?>
<h3>HEX_CHARS</h3>
DATA<br>
<textarea cols="80" rows="4"><?php echo $testH['data'];?></textarea><hr />
CHARS<br>
<textarea cols="80" rows="4"><?php echo $testH['chars'];?></textarea><hr />
HEX<br>
<textarea cols="80" rows="4"><?php echo $testH['hex'];?></textarea><hr />
MB_CHARS<br>
<textarea cols="80" rows="4"><?php echo $testH['mb_chars'];?></textarea><hr />
MB_HEX<br>
<textarea cols="80" rows="4"><?php echo $testH['mb_hex'];?></textarea><hr />
</body>
</html>