<?php
/*
:: Latin characters issue using rawurldecode() ::
------------------------------------------
What happen if you need tu convert this %C3%B1 into this 'ñ' using rawurldecode()? Well, it doesn't work as we'd wish to. We'll get this "ñ". To fix this issue, I've made the following function:
*/
function urlRawDecode($raw_url_encoded)
{
# Hex conversion table
$hex_table = array(
0 => 0x00,
1 => 0x01,
2 => 0x02,
3 => 0x03,
4 => 0x04,
5 => 0x05,
6 => 0x06,
7 => 0x07,
8 => 0x08,
9 => 0x09,
"A"=> 0x0a,
"B"=> 0x0b,
"C"=> 0x0c,
"D"=> 0x0d,
"E"=> 0x0e,
"F"=> 0x0f
);
# Looking for latin characters with a pattern like this %C3%[A-Z0-9]{2} ie. -> %C3%B1 = 'ñ'
if(preg_match_all("/\%C3\%([A-Z0-9]{2})/i",$raw_url_encoded,$res))
{
$res = array_unique($res = $res[1]);
$arr_unicoded = array();
foreach($res as $key => $value){
$arr_unicoded[] = chr(
(0xc0 | ($hex_table[substr($value,0,1)]<<4)) | (0x03 & $hex_table[substr($value,1,1)])
);
$res[$key] = "%C3%" . $value;
}
$raw_url_encoded = str_replace($res,$arr_unicoded,$raw_url_encoded);
}
# Return raw url decoded
return rawurldecode($raw_url_encoded);
}
# Testing
print "Decoded character -> " . urlRawDecode("%C3%B1");
// ouput:
// Decoded chracter -> ñ
/*
:: A little explanation about what does this function do ::
-----------------------------------------------------
This function makes two binary operations between C3 and B1. To get an ASCII representation of this kind of raw url encoded character, we have to make a logical OR between HIGH nibble of 0xC3 (0xC) and HIGH nibble of 0xB1 (0xB) -> (0xC0 | 0xB0), then, a logical AND between both LOW nibble (0x03 & 0x01), and finally we have to make a logical OR between these two results -> [hex] ((0xC0 | 0xB0) | (0x03 & 0x01)) = [binary] ((1100 0000 | 1011 0000) | (0000 0011 & 0000 0001)) = [hex] 0xF1 = [binary] 1111 0001 = "ñ" character.
Hope to be helpfull, if you have an issue like this, try to use this function.
Bye,
Javi =)
*/
?>