projects.mako.cc - scuttle/blob - includes/utf8/mbstring/core.php

   1 <?php
   2 /**
   3 * @version $Id: core.php,v 1.5 2006/02/28 22:12:25 harryf Exp $
   4 * @package utf8
   5 * @subpackage strings
   6 */
   7
   8 /**
   9 * Define UTF8_CORE as required
  10 */
  11 if ( !defined('UTF8_CORE') ) {
  12     define('UTF8_CORE',TRUE);
  13 }
  14
  15 //--------------------------------------------------------------------
  16 /**
  17 * Wrapper round mb_strlen
  18 * Assumes you have mb_internal_encoding to UTF-8 already
  19 * Note: this function does not count bad bytes in the string - these
  20 * are simply ignored
  21 * @param string UTF-8 string
  22 * @return int number of UTF-8 characters in string
  23 * @package utf8
  24 * @subpackage strings
  25 */
  26 function utf8_strlen($str){
  27     return mb_strlen($str);
  28 }
  29
  30
  31 //--------------------------------------------------------------------
  32 /**
  33 * Assumes mbstring internal encoding is set to UTF-8
  34 * Wrapper around mb_strpos
  35 * Find position of first occurrence of a string
  36 * @param string haystack
  37 * @param string needle (you should validate this with utf8_is_valid)
  38 * @param integer offset in characters (from left)
  39 * @return mixed integer position or FALSE on failure
  40 * @package utf8
  41 * @subpackage strings
  42 */
  43 function utf8_strpos($str, $search, $offset = FALSE){
  44     if ( $offset === FALSE ) {
  45         return mb_strpos($str, $search);
  46     } else {
  47         return mb_strpos($str, $search, $offset);
  48     }
  49 }
  50
  51 //--------------------------------------------------------------------
  52 /**
  53 * Assumes mbstring internal encoding is set to UTF-8
  54 * Wrapper around mb_strrpos
  55 * Find position of last occurrence of a char in a string
  56 * @param string haystack
  57 * @param string needle (you should validate this with utf8_is_valid)
  58 * @param integer (optional) offset (from left)
  59 * @return mixed integer position or FALSE on failure
  60 * @package utf8
  61 * @subpackage strings
  62 */
  63 function utf8_strrpos($str, $search, $offset = FALSE){
  64     if ( $offset === FALSE ) {
  65         # Emulate behaviour of strrpos rather than raising warning
  66         if ( empty($str) ) {
  67             return FALSE;
  68         }
  69         return mb_strrpos($str, $search);
  70     } else {
  71         if ( !is_int($offset) ) {
  72             trigger_error('utf8_strrpos expects parameter 3 to be long',E_USER_WARNING);
  73             return FALSE;
  74         }
  75
  76         $str = mb_substr($str, $offset);
  77
  78         if ( FALSE !== ( $pos = mb_strrpos($str, $search) ) ) {
  79             return $pos + $offset;
  80         }
  81
  82         return FALSE;
  83     }
  84 }
  85
  86 //--------------------------------------------------------------------
  87 /**
  88 * Assumes mbstring internal encoding is set to UTF-8
  89 * Wrapper around mb_substr
  90 * Return part of a string given character offset (and optionally length)
  91 * @param string
  92 * @param integer number of UTF-8 characters offset (from left)
  93 * @param integer (optional) length in UTF-8 characters from offset
  94 * @return mixed string or FALSE if failure
  95 * @package utf8
  96 * @subpackage strings
  97 */
  98 function utf8_substr($str, $offset, $length = FALSE){
  99     if ( $length === FALSE ) {
 100         return mb_substr($str, $offset);
 101     } else {
 102         return mb_substr($str, $offset, $length);
 103     }
 104 }
 105
 106 //--------------------------------------------------------------------
 107 /**
 108 * Assumes mbstring internal encoding is set to UTF-8
 109 * Wrapper around mb_strtolower
 110 * Make a string lowercase
 111 * Note: The concept of a characters "case" only exists is some alphabets
 112 * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
 113 * not exist in the Chinese alphabet, for example. See Unicode Standard
 114 * Annex #21: Case Mappings
 115 * @param string
 116 * @return mixed either string in lowercase or FALSE is UTF-8 invalid
 117 * @package utf8
 118 * @subpackage strings
 119 */
 120 function utf8_strtolower($str){
 121     return mb_strtolower($str);
 122 }
 123
 124 //--------------------------------------------------------------------
 125 /**
 126 * Assumes mbstring internal encoding is set to UTF-8
 127 * Wrapper around mb_strtoupper
 128 * Make a string uppercase
 129 * Note: The concept of a characters "case" only exists is some alphabets
 130 * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
 131 * not exist in the Chinese alphabet, for example. See Unicode Standard
 132 * Annex #21: Case Mappings
 133 * @param string
 134 * @return mixed either string in lowercase or FALSE is UTF-8 invalid
 135 * @package utf8
 136 * @subpackage strings
 137 */
 138 function utf8_strtoupper($str){
 139     return mb_strtoupper($str);
 140 }