1.20.x transliteration.inc transliteration_remove_diacritics($string)

File

includes/transliteration.inc, line 292
Transliteration processing functions.

Code

function transliteration_remove_diacritics($string) {
  $result = '';

  foreach (preg_split('//u', $string, 0, PREG_SPLIT_NO_EMPTY) as $character) {
    $code = _transliteration_ord_utf8($character);

    // These two Unicode ranges include the accented US-ASCII letters, with a
    // few characters that aren't accented letters mixed in. So define the
    // ranges and the excluded characters.
    $range1 = $code > 0x00bf && $code < 0x017f;
    $exclusions_range1 = array(0x00d0, 0x00d7, 0x00f0, 0x00f7, 0x0138, 0x014a, 0x014b);
    $range2 = $code > 0x01cc && $code < 0x0250;
    $exclusions_range2 = array(0x01DD, 0x01f7, 0x021c, 0x021d, 0x0220, 0x0221, 0x0241, 0x0242, 0x0245);

    $replacement = $character;
    if (($range1 && !in_array($code, $exclusions_range1)) || ($range2 && !in_array($code, $exclusions_range2))) {
      $to_add = transliteration_get($code, 'xyz');
      if (strlen($to_add) === 1) {
        $replacement = $to_add;
      }
    }

    $result .= $replacement;
  }

  return $result;
}