From: Marcus Campbell Date: Sun, 28 Mar 2010 05:17:09 +0000 (+0000) Subject: Tagged 0.7.5 X-Git-Url: https://projects.mako.cc/source/scuttle/commitdiff_plain/c7f63c8b9b12efd7b3c10b9f80cda06eaf32068f Tagged 0.7.5 --- diff --git a/.htaccess b/.htaccess index 89f6dfd..de83899 100644 --- a/.htaccess +++ b/.htaccess @@ -1,13 +1,12 @@ # Rewrite clean URLs onto real files -Options +FollowSymlinks - + AcceptPathInfo On - -RewriteEngine On -RewriteBase / -RewriteCond %{REQUEST_FILENAME} !-f -RewriteCond %{REQUEST_FILENAME} !-d -RewriteCond %{REQUEST_FILENAME}.php -f -RewriteRule ^([^/]+)/?(.*) $1.php/$2 [L] + + RewriteEngine On + RewriteBase / + RewriteCond %{REQUEST_FILENAME} !-f + RewriteCond %{REQUEST_FILENAME} !-d + RewriteCond %{REQUEST_FILENAME}.php -f + RewriteRule ^([^/]+)/?(.*) $1.php/$2 [L] diff --git a/AUTHORS b/AUTHORS index a45e1e7..998ffe8 100644 --- a/AUTHORS +++ b/AUTHORS @@ -1,8 +1,7 @@ -Scuttle contains code from the following applications: +Scuttle contains code from the following open-source projects: ------------- -GPL Licenced ------------- +jQuery +http://www.jquery.com/ phpBB2 (database abstraction layer) http://www.phpbb.com/ @@ -11,8 +10,8 @@ php-gettext Danilo Segan http://savannah.nongnu.org/projects/php-gettext/ -UTF8 Helper Functions -Andreas Gohr +PHP UTF-8 +https://sourceforge.net/projects/phputf8/ XSPF Web Music Player (Flash) http://musicplayer.sourceforge.net/ \ No newline at end of file diff --git a/about.php b/about.php index 020a351..c089cb9 100644 --- a/about.php +++ b/about.php @@ -1,6 +1,6 @@ loadTemplate('about.tpl', $tplVars); -?> \ No newline at end of file diff --git a/ajaxDelete.php b/ajaxDelete.php index ad3efc8..1972a20 100644 --- a/ajaxDelete.php +++ b/ajaxDelete.php @@ -1,6 +1,6 @@ editAllowed($bookmark)) { - $result = T_('You are not allowed to delete this bookmark'); + echo T_('You are not allowed to delete this bookmark'); } elseif ($bookmarkservice->deleteBookmark($bookmark)) { - $result = 'true'; + echo true; } else { - $result = T_('Failed to delete bookmark'); + echo T_('Failed to delete bookmark'); } -?> - - deleteConfirmed - - \ No newline at end of file diff --git a/ajaxGetTitle.php b/ajaxGetTitle.php index 473fa59..14b1f46 100644 --- a/ajaxGetTitle.php +++ b/ajaxGetTitle.php @@ -1,6 +1,6 @@ '; -?> - - getTitle - - \ No newline at end of file +echo getTitle($_GET['url']); diff --git a/ajaxIsAvailable.php b/ajaxIsAvailable.php index 80883c6..8cd10c0 100644 --- a/ajaxIsAvailable.php +++ b/ajaxIsAvailable.php @@ -1,6 +1,6 @@ isReserved($_GET['username'])) { - $result = 'false'; -} else { - $result = $userservice->getUserByUsername($_GET['username']) ? 'false' : 'true'; -} -?> - - isAvailable - - \ No newline at end of file +$userservice =& ServiceFactory::getServiceInstance('UserService'); +echo !($userservice->isReserved($_GET['username']) || $userservice->getUserByUsername($_GET['username'])); diff --git a/alltags.php b/alltags.php index b784414..1b0476f 100644 --- a/alltags.php +++ b/alltags.php @@ -1,6 +1,6 @@ +include_once 'debug.inc.php'; diff --git a/edit.php b/edit.php index 028e8ba..cca8e47 100644 --- a/edit.php +++ b/edit.php @@ -1,6 +1,6 @@ \n\n". $msg_title ."\n

\n". $msg_text ."\n"; exit; } -?> diff --git a/header.inc.php b/header.inc.php index de56c84..b8971c3 100644 --- a/header.inc.php +++ b/header.inc.php @@ -7,15 +7,15 @@ error_reporting(E_ALL ^ E_NOTICE); define('DEBUG', true); session_start(); -require_once(dirname(__FILE__) .'/services/servicefactory.php'); -require_once(dirname(__FILE__) .'/config.inc.php'); -require_once(dirname(__FILE__) .'/functions.inc.php'); +require_once dirname(__FILE__) .'/services/servicefactory.php'; +require_once dirname(__FILE__) .'/config.inc.php'; +require_once dirname(__FILE__) .'/functions.inc.php'; // Determine the base URL if (!isset($root)) { $pieces = explode('/', $_SERVER['SCRIPT_NAME']); - $root = '/'; - foreach($pieces as $piece) { + $root = '/'; + foreach ($pieces as $piece) { if ($piece != '' && !strstr($piece, '.php')) { $root .= $piece .'/'; } @@ -24,12 +24,13 @@ if (!isset($root)) { $root .= '/'; } $path = $root; - $root = 'http://'. $_SERVER['HTTP_HOST'] . $root; + + $protocol = (isset($_SERVER['HTTPS']) && $_SERVER['HTTPS'] == 'on') ? 'https' : 'http'; + $root = $protocol .'://'. $_SERVER['HTTP_HOST'] . $root; } // Error codes -define('GENERAL_MESSAGE', 200); -define('GENERAL_ERROR', 202); +define('GENERAL_MESSAGE', 200); +define('GENERAL_ERROR', 202); define('CRITICAL_MESSAGE', 203); -define('CRITICAL_ERROR', 204); -?> \ No newline at end of file +define('CRITICAL_ERROR', 204); diff --git a/history.php b/history.php index 568a8b6..f27cf09 100644 --- a/history.php +++ b/history.php @@ -1,6 +1,6 @@ )[^>]*$|^#([\w-]+)$/,Ua=/^.[^:#\[\.,]*$/,Va=/\S/, +Wa=/^(\s|\u00A0)+|(\s|\u00A0)+$/g,Xa=/^<(\w+)\s*\/?>(?:<\/\1>)?$/,P=navigator.userAgent,xa=false,Q=[],L,$=Object.prototype.toString,aa=Object.prototype.hasOwnProperty,ba=Array.prototype.push,R=Array.prototype.slice,ya=Array.prototype.indexOf;c.fn=c.prototype={init:function(a,b){var d,f;if(!a)return this;if(a.nodeType){this.context=this[0]=a;this.length=1;return this}if(a==="body"&&!b){this.context=s;this[0]=s.body;this.selector="body";this.length=1;return this}if(typeof a==="string")if((d=Ta.exec(a))&& +(d[1]||!b))if(d[1]){f=b?b.ownerDocument||b:s;if(a=Xa.exec(a))if(c.isPlainObject(b)){a=[s.createElement(a[1])];c.fn.attr.call(a,b,true)}else a=[f.createElement(a[1])];else{a=sa([d[1]],[f]);a=(a.cacheable?a.fragment.cloneNode(true):a.fragment).childNodes}return c.merge(this,a)}else{if(b=s.getElementById(d[2])){if(b.id!==d[2])return T.find(a);this.length=1;this[0]=b}this.context=s;this.selector=a;return this}else if(!b&&/^\w+$/.test(a)){this.selector=a;this.context=s;a=s.getElementsByTagName(a);return c.merge(this, +a)}else return!b||b.jquery?(b||T).find(a):c(b).find(a);else if(c.isFunction(a))return T.ready(a);if(a.selector!==w){this.selector=a.selector;this.context=a.context}return c.makeArray(a,this)},selector:"",jquery:"1.4.2",length:0,size:function(){return this.length},toArray:function(){return R.call(this,0)},get:function(a){return a==null?this.toArray():a<0?this.slice(a)[0]:this[a]},pushStack:function(a,b,d){var f=c();c.isArray(a)?ba.apply(f,a):c.merge(f,a);f.prevObject=this;f.context=this.context;if(b=== +"find")f.selector=this.selector+(this.selector?" ":"")+d;else if(b)f.selector=this.selector+"."+b+"("+d+")";return f},each:function(a,b){return c.each(this,a,b)},ready:function(a){c.bindReady();if(c.isReady)a.call(s,c);else Q&&Q.push(a);return this},eq:function(a){return a===-1?this.slice(a):this.slice(a,+a+1)},first:function(){return this.eq(0)},last:function(){return this.eq(-1)},slice:function(){return this.pushStack(R.apply(this,arguments),"slice",R.call(arguments).join(","))},map:function(a){return this.pushStack(c.map(this, +function(b,d){return a.call(b,d,b)}))},end:function(){return this.prevObject||c(null)},push:ba,sort:[].sort,splice:[].splice};c.fn.init.prototype=c.fn;c.extend=c.fn.extend=function(){var a=arguments[0]||{},b=1,d=arguments.length,f=false,e,j,i,o;if(typeof a==="boolean"){f=a;a=arguments[1]||{};b=2}if(typeof a!=="object"&&!c.isFunction(a))a={};if(d===b){a=this;--b}for(;b
a"; +var e=d.getElementsByTagName("*"),j=d.getElementsByTagName("a")[0];if(!(!e||!e.length||!j)){c.support={leadingWhitespace:d.firstChild.nodeType===3,tbody:!d.getElementsByTagName("tbody").length,htmlSerialize:!!d.getElementsByTagName("link").length,style:/red/.test(j.getAttribute("style")),hrefNormalized:j.getAttribute("href")==="/a",opacity:/^0.55$/.test(j.style.opacity),cssFloat:!!j.style.cssFloat,checkOn:d.getElementsByTagName("input")[0].value==="on",optSelected:s.createElement("select").appendChild(s.createElement("option")).selected, +parentNode:d.removeChild(d.appendChild(s.createElement("div"))).parentNode===null,deleteExpando:true,checkClone:false,scriptEval:false,noCloneEvent:true,boxModel:null};b.type="text/javascript";try{b.appendChild(s.createTextNode("window."+f+"=1;"))}catch(i){}a.insertBefore(b,a.firstChild);if(A[f]){c.support.scriptEval=true;delete A[f]}try{delete b.test}catch(o){c.support.deleteExpando=false}a.removeChild(b);if(d.attachEvent&&d.fireEvent){d.attachEvent("onclick",function k(){c.support.noCloneEvent= +false;d.detachEvent("onclick",k)});d.cloneNode(true).fireEvent("onclick")}d=s.createElement("div");d.innerHTML="";a=s.createDocumentFragment();a.appendChild(d.firstChild);c.support.checkClone=a.cloneNode(true).cloneNode(true).lastChild.checked;c(function(){var k=s.createElement("div");k.style.width=k.style.paddingLeft="1px";s.body.appendChild(k);c.boxModel=c.support.boxModel=k.offsetWidth===2;s.body.removeChild(k).style.display="none"});a=function(k){var n= +s.createElement("div");k="on"+k;var r=k in n;if(!r){n.setAttribute(k,"return;");r=typeof n[k]==="function"}return r};c.support.submitBubbles=a("submit");c.support.changeBubbles=a("change");a=b=d=e=j=null}})();c.props={"for":"htmlFor","class":"className",readonly:"readOnly",maxlength:"maxLength",cellspacing:"cellSpacing",rowspan:"rowSpan",colspan:"colSpan",tabindex:"tabIndex",usemap:"useMap",frameborder:"frameBorder"};var G="jQuery"+J(),Ya=0,za={};c.extend({cache:{},expando:G,noData:{embed:true,object:true, +applet:true},data:function(a,b,d){if(!(a.nodeName&&c.noData[a.nodeName.toLowerCase()])){a=a==A?za:a;var f=a[G],e=c.cache;if(!f&&typeof b==="string"&&d===w)return null;f||(f=++Ya);if(typeof b==="object"){a[G]=f;e[f]=c.extend(true,{},b)}else if(!e[f]){a[G]=f;e[f]={}}a=e[f];if(d!==w)a[b]=d;return typeof b==="string"?a[b]:a}},removeData:function(a,b){if(!(a.nodeName&&c.noData[a.nodeName.toLowerCase()])){a=a==A?za:a;var d=a[G],f=c.cache,e=f[d];if(b){if(e){delete e[b];c.isEmptyObject(e)&&c.removeData(a)}}else{if(c.support.deleteExpando)delete a[c.expando]; +else a.removeAttribute&&a.removeAttribute(c.expando);delete f[d]}}}});c.fn.extend({data:function(a,b){if(typeof a==="undefined"&&this.length)return c.data(this[0]);else if(typeof a==="object")return this.each(function(){c.data(this,a)});var d=a.split(".");d[1]=d[1]?"."+d[1]:"";if(b===w){var f=this.triggerHandler("getData"+d[1]+"!",[d[0]]);if(f===w&&this.length)f=c.data(this[0],a);return f===w&&d[1]?this.data(d[0]):f}else return this.trigger("setData"+d[1]+"!",[d[0],b]).each(function(){c.data(this, +a,b)})},removeData:function(a){return this.each(function(){c.removeData(this,a)})}});c.extend({queue:function(a,b,d){if(a){b=(b||"fx")+"queue";var f=c.data(a,b);if(!d)return f||[];if(!f||c.isArray(d))f=c.data(a,b,c.makeArray(d));else f.push(d);return f}},dequeue:function(a,b){b=b||"fx";var d=c.queue(a,b),f=d.shift();if(f==="inprogress")f=d.shift();if(f){b==="fx"&&d.unshift("inprogress");f.call(a,function(){c.dequeue(a,b)})}}});c.fn.extend({queue:function(a,b){if(typeof a!=="string"){b=a;a="fx"}if(b=== +w)return c.queue(this[0],a);return this.each(function(){var d=c.queue(this,a,b);a==="fx"&&d[0]!=="inprogress"&&c.dequeue(this,a)})},dequeue:function(a){return this.each(function(){c.dequeue(this,a)})},delay:function(a,b){a=c.fx?c.fx.speeds[a]||a:a;b=b||"fx";return this.queue(b,function(){var d=this;setTimeout(function(){c.dequeue(d,b)},a)})},clearQueue:function(a){return this.queue(a||"fx",[])}});var Aa=/[\n\t]/g,ca=/\s+/,Za=/\r/g,$a=/href|src|style/,ab=/(button|input)/i,bb=/(button|input|object|select|textarea)/i, +cb=/^(a|area)$/i,Ba=/radio|checkbox/;c.fn.extend({attr:function(a,b){return X(this,a,b,true,c.attr)},removeAttr:function(a){return this.each(function(){c.attr(this,a,"");this.nodeType===1&&this.removeAttribute(a)})},addClass:function(a){if(c.isFunction(a))return this.each(function(n){var r=c(this);r.addClass(a.call(this,n,r.attr("class")))});if(a&&typeof a==="string")for(var b=(a||"").split(ca),d=0,f=this.length;d-1)return true;return false},val:function(a){if(a===w){var b=this[0];if(b){if(c.nodeName(b,"option"))return(b.attributes.value||{}).specified?b.value:b.text;if(c.nodeName(b,"select")){var d=b.selectedIndex,f=[],e=b.options;b=b.type==="select-one";if(d<0)return null;var j=b?d:0;for(d=b?d+1:e.length;j=0;else if(c.nodeName(this,"select")){var u=c.makeArray(r);c("option",this).each(function(){this.selected= +c.inArray(c(this).val(),u)>=0});if(!u.length)this.selectedIndex=-1}else this.value=r}})}});c.extend({attrFn:{val:true,css:true,html:true,text:true,data:true,width:true,height:true,offset:true},attr:function(a,b,d,f){if(!a||a.nodeType===3||a.nodeType===8)return w;if(f&&b in c.attrFn)return c(a)[b](d);f=a.nodeType!==1||!c.isXMLDoc(a);var e=d!==w;b=f&&c.props[b]||b;if(a.nodeType===1){var j=$a.test(b);if(b in a&&f&&!j){if(e){b==="type"&&ab.test(a.nodeName)&&a.parentNode&&c.error("type property can't be changed"); +a[b]=d}if(c.nodeName(a,"form")&&a.getAttributeNode(b))return a.getAttributeNode(b).nodeValue;if(b==="tabIndex")return(b=a.getAttributeNode("tabIndex"))&&b.specified?b.value:bb.test(a.nodeName)||cb.test(a.nodeName)&&a.href?0:w;return a[b]}if(!c.support.style&&f&&b==="style"){if(e)a.style.cssText=""+d;return a.style.cssText}e&&a.setAttribute(b,""+d);a=!c.support.hrefNormalized&&f&&j?a.getAttribute(b,2):a.getAttribute(b);return a===null?w:a}return c.style(a,b,d)}});var O=/\.(.*)$/,db=function(a){return a.replace(/[^\w\s\.\|`]/g, +function(b){return"\\"+b})};c.event={add:function(a,b,d,f){if(!(a.nodeType===3||a.nodeType===8)){if(a.setInterval&&a!==A&&!a.frameElement)a=A;var e,j;if(d.handler){e=d;d=e.handler}if(!d.guid)d.guid=c.guid++;if(j=c.data(a)){var i=j.events=j.events||{},o=j.handle;if(!o)j.handle=o=function(){return typeof c!=="undefined"&&!c.event.triggered?c.event.handle.apply(o.elem,arguments):w};o.elem=a;b=b.split(" ");for(var k,n=0,r;k=b[n++];){j=e?c.extend({},e):{handler:d,data:f};if(k.indexOf(".")>-1){r=k.split("."); +k=r.shift();j.namespace=r.slice(0).sort().join(".")}else{r=[];j.namespace=""}j.type=k;j.guid=d.guid;var u=i[k],z=c.event.special[k]||{};if(!u){u=i[k]=[];if(!z.setup||z.setup.call(a,f,r,o)===false)if(a.addEventListener)a.addEventListener(k,o,false);else a.attachEvent&&a.attachEvent("on"+k,o)}if(z.add){z.add.call(a,j);if(!j.handler.guid)j.handler.guid=d.guid}u.push(j);c.event.global[k]=true}a=null}}},global:{},remove:function(a,b,d,f){if(!(a.nodeType===3||a.nodeType===8)){var e,j=0,i,o,k,n,r,u,z=c.data(a), +C=z&&z.events;if(z&&C){if(b&&b.type){d=b.handler;b=b.type}if(!b||typeof b==="string"&&b.charAt(0)==="."){b=b||"";for(e in C)c.event.remove(a,e+b)}else{for(b=b.split(" ");e=b[j++];){n=e;i=e.indexOf(".")<0;o=[];if(!i){o=e.split(".");e=o.shift();k=new RegExp("(^|\\.)"+c.map(o.slice(0).sort(),db).join("\\.(?:.*\\.)?")+"(\\.|$)")}if(r=C[e])if(d){n=c.event.special[e]||{};for(B=f||0;B=0){a.type= +e=e.slice(0,-1);a.exclusive=true}if(!d){a.stopPropagation();c.event.global[e]&&c.each(c.cache,function(){this.events&&this.events[e]&&c.event.trigger(a,b,this.handle.elem)})}if(!d||d.nodeType===3||d.nodeType===8)return w;a.result=w;a.target=d;b=c.makeArray(b);b.unshift(a)}a.currentTarget=d;(f=c.data(d,"handle"))&&f.apply(d,b);f=d.parentNode||d.ownerDocument;try{if(!(d&&d.nodeName&&c.noData[d.nodeName.toLowerCase()]))if(d["on"+e]&&d["on"+e].apply(d,b)===false)a.result=false}catch(j){}if(!a.isPropagationStopped()&& +f)c.event.trigger(a,b,f,true);else if(!a.isDefaultPrevented()){f=a.target;var i,o=c.nodeName(f,"a")&&e==="click",k=c.event.special[e]||{};if((!k._default||k._default.call(d,a)===false)&&!o&&!(f&&f.nodeName&&c.noData[f.nodeName.toLowerCase()])){try{if(f[e]){if(i=f["on"+e])f["on"+e]=null;c.event.triggered=true;f[e]()}}catch(n){}if(i)f["on"+e]=i;c.event.triggered=false}}},handle:function(a){var b,d,f,e;a=arguments[0]=c.event.fix(a||A.event);a.currentTarget=this;b=a.type.indexOf(".")<0&&!a.exclusive; +if(!b){d=a.type.split(".");a.type=d.shift();f=new RegExp("(^|\\.)"+d.slice(0).sort().join("\\.(?:.*\\.)?")+"(\\.|$)")}e=c.data(this,"events");d=e[a.type];if(e&&d){d=d.slice(0);e=0;for(var j=d.length;e-1?c.map(a.options,function(f){return f.selected}).join("-"):"";else if(a.nodeName.toLowerCase()==="select")d=a.selectedIndex;return d},fa=function(a,b){var d=a.target,f,e;if(!(!da.test(d.nodeName)||d.readOnly)){f=c.data(d,"_change_data");e=Fa(d);if(a.type!=="focusout"||d.type!=="radio")c.data(d,"_change_data", +e);if(!(f===w||e===f))if(f!=null||e){a.type="change";return c.event.trigger(a,b,d)}}};c.event.special.change={filters:{focusout:fa,click:function(a){var b=a.target,d=b.type;if(d==="radio"||d==="checkbox"||b.nodeName.toLowerCase()==="select")return fa.call(this,a)},keydown:function(a){var b=a.target,d=b.type;if(a.keyCode===13&&b.nodeName.toLowerCase()!=="textarea"||a.keyCode===32&&(d==="checkbox"||d==="radio")||d==="select-multiple")return fa.call(this,a)},beforeactivate:function(a){a=a.target;c.data(a, +"_change_data",Fa(a))}},setup:function(){if(this.type==="file")return false;for(var a in ea)c.event.add(this,a+".specialChange",ea[a]);return da.test(this.nodeName)},teardown:function(){c.event.remove(this,".specialChange");return da.test(this.nodeName)}};ea=c.event.special.change.filters}s.addEventListener&&c.each({focus:"focusin",blur:"focusout"},function(a,b){function d(f){f=c.event.fix(f);f.type=b;return c.event.handle.call(this,f)}c.event.special[b]={setup:function(){this.addEventListener(a, +d,true)},teardown:function(){this.removeEventListener(a,d,true)}}});c.each(["bind","one"],function(a,b){c.fn[b]=function(d,f,e){if(typeof d==="object"){for(var j in d)this[b](j,f,d[j],e);return this}if(c.isFunction(f)){e=f;f=w}var i=b==="one"?c.proxy(e,function(k){c(this).unbind(k,i);return e.apply(this,arguments)}):e;if(d==="unload"&&b!=="one")this.one(d,f,e);else{j=0;for(var o=this.length;j0){y=t;break}}t=t[g]}m[q]=y}}}var f=/((?:\((?:\([^()]+\)|[^()]+)+\)|\[(?:\[[^[\]]*\]|['"][^'"]*['"]|[^[\]'"]+)+\]|\\.|[^ >+~,(\[\\]+)+|[>+~])(\s*,\s*)?((?:.|\r|\n)*)/g, +e=0,j=Object.prototype.toString,i=false,o=true;[0,0].sort(function(){o=false;return 0});var k=function(g,h,l,m){l=l||[];var q=h=h||s;if(h.nodeType!==1&&h.nodeType!==9)return[];if(!g||typeof g!=="string")return l;for(var p=[],v,t,y,S,H=true,M=x(h),I=g;(f.exec(""),v=f.exec(I))!==null;){I=v[3];p.push(v[1]);if(v[2]){S=v[3];break}}if(p.length>1&&r.exec(g))if(p.length===2&&n.relative[p[0]])t=ga(p[0]+p[1],h);else for(t=n.relative[p[0]]?[h]:k(p.shift(),h);p.length;){g=p.shift();if(n.relative[g])g+=p.shift(); +t=ga(g,t)}else{if(!m&&p.length>1&&h.nodeType===9&&!M&&n.match.ID.test(p[0])&&!n.match.ID.test(p[p.length-1])){v=k.find(p.shift(),h,M);h=v.expr?k.filter(v.expr,v.set)[0]:v.set[0]}if(h){v=m?{expr:p.pop(),set:z(m)}:k.find(p.pop(),p.length===1&&(p[0]==="~"||p[0]==="+")&&h.parentNode?h.parentNode:h,M);t=v.expr?k.filter(v.expr,v.set):v.set;if(p.length>0)y=z(t);else H=false;for(;p.length;){var D=p.pop();v=D;if(n.relative[D])v=p.pop();else D="";if(v==null)v=h;n.relative[D](y,v,M)}}else y=[]}y||(y=t);y||k.error(D|| +g);if(j.call(y)==="[object Array]")if(H)if(h&&h.nodeType===1)for(g=0;y[g]!=null;g++){if(y[g]&&(y[g]===true||y[g].nodeType===1&&E(h,y[g])))l.push(t[g])}else for(g=0;y[g]!=null;g++)y[g]&&y[g].nodeType===1&&l.push(t[g]);else l.push.apply(l,y);else z(y,l);if(S){k(S,q,l,m);k.uniqueSort(l)}return l};k.uniqueSort=function(g){if(B){i=o;g.sort(B);if(i)for(var h=1;h":function(g,h){var l=typeof h==="string";if(l&&!/\W/.test(h)){h=h.toLowerCase();for(var m=0,q=g.length;m=0))l||m.push(v);else if(l)h[p]=false;return false},ID:function(g){return g[1].replace(/\\/g,"")},TAG:function(g){return g[1].toLowerCase()}, +CHILD:function(g){if(g[1]==="nth"){var h=/(-?)(\d*)n((?:\+|-)?\d*)/.exec(g[2]==="even"&&"2n"||g[2]==="odd"&&"2n+1"||!/\D/.test(g[2])&&"0n+"+g[2]||g[2]);g[2]=h[1]+(h[2]||1)-0;g[3]=h[3]-0}g[0]=e++;return g},ATTR:function(g,h,l,m,q,p){h=g[1].replace(/\\/g,"");if(!p&&n.attrMap[h])g[1]=n.attrMap[h];if(g[2]==="~=")g[4]=" "+g[4]+" ";return g},PSEUDO:function(g,h,l,m,q){if(g[1]==="not")if((f.exec(g[3])||"").length>1||/^\w/.test(g[3]))g[3]=k(g[3],null,null,h);else{g=k.filter(g[3],h,l,true^q);l||m.push.apply(m, +g);return false}else if(n.match.POS.test(g[0])||n.match.CHILD.test(g[0]))return true;return g},POS:function(g){g.unshift(true);return g}},filters:{enabled:function(g){return g.disabled===false&&g.type!=="hidden"},disabled:function(g){return g.disabled===true},checked:function(g){return g.checked===true},selected:function(g){return g.selected===true},parent:function(g){return!!g.firstChild},empty:function(g){return!g.firstChild},has:function(g,h,l){return!!k(l[3],g).length},header:function(g){return/h\d/i.test(g.nodeName)}, +text:function(g){return"text"===g.type},radio:function(g){return"radio"===g.type},checkbox:function(g){return"checkbox"===g.type},file:function(g){return"file"===g.type},password:function(g){return"password"===g.type},submit:function(g){return"submit"===g.type},image:function(g){return"image"===g.type},reset:function(g){return"reset"===g.type},button:function(g){return"button"===g.type||g.nodeName.toLowerCase()==="button"},input:function(g){return/input|select|textarea|button/i.test(g.nodeName)}}, +setFilters:{first:function(g,h){return h===0},last:function(g,h,l,m){return h===m.length-1},even:function(g,h){return h%2===0},odd:function(g,h){return h%2===1},lt:function(g,h,l){return hl[3]-0},nth:function(g,h,l){return l[3]-0===h},eq:function(g,h,l){return l[3]-0===h}},filter:{PSEUDO:function(g,h,l,m){var q=h[1],p=n.filters[q];if(p)return p(g,l,h,m);else if(q==="contains")return(g.textContent||g.innerText||a([g])||"").indexOf(h[3])>=0;else if(q==="not"){h= +h[3];l=0;for(m=h.length;l=0}},ID:function(g,h){return g.nodeType===1&&g.getAttribute("id")===h},TAG:function(g,h){return h==="*"&&g.nodeType===1||g.nodeName.toLowerCase()===h},CLASS:function(g,h){return(" "+(g.className||g.getAttribute("class"))+" ").indexOf(h)>-1},ATTR:function(g,h){var l=h[1];g=n.attrHandle[l]?n.attrHandle[l](g):g[l]!=null?g[l]:g.getAttribute(l);l=g+"";var m=h[2];h=h[4];return g==null?m==="!=":m=== +"="?l===h:m==="*="?l.indexOf(h)>=0:m==="~="?(" "+l+" ").indexOf(h)>=0:!h?l&&g!==false:m==="!="?l!==h:m==="^="?l.indexOf(h)===0:m==="$="?l.substr(l.length-h.length)===h:m==="|="?l===h||l.substr(0,h.length+1)===h+"-":false},POS:function(g,h,l,m){var q=n.setFilters[h[2]];if(q)return q(g,l,h,m)}}},r=n.match.POS;for(var u in n.match){n.match[u]=new RegExp(n.match[u].source+/(?![^\[]*\])(?![^\(]*\))/.source);n.leftMatch[u]=new RegExp(/(^(?:.|\r|\n)*?)/.source+n.match[u].source.replace(/\\(\d+)/g,function(g, +h){return"\\"+(h-0+1)}))}var z=function(g,h){g=Array.prototype.slice.call(g,0);if(h){h.push.apply(h,g);return h}return g};try{Array.prototype.slice.call(s.documentElement.childNodes,0)}catch(C){z=function(g,h){h=h||[];if(j.call(g)==="[object Array]")Array.prototype.push.apply(h,g);else if(typeof g.length==="number")for(var l=0,m=g.length;l";var l=s.documentElement;l.insertBefore(g,l.firstChild);if(s.getElementById(h)){n.find.ID=function(m,q,p){if(typeof q.getElementById!=="undefined"&&!p)return(q=q.getElementById(m[1]))?q.id===m[1]||typeof q.getAttributeNode!=="undefined"&& +q.getAttributeNode("id").nodeValue===m[1]?[q]:w:[]};n.filter.ID=function(m,q){var p=typeof m.getAttributeNode!=="undefined"&&m.getAttributeNode("id");return m.nodeType===1&&p&&p.nodeValue===q}}l.removeChild(g);l=g=null})();(function(){var g=s.createElement("div");g.appendChild(s.createComment(""));if(g.getElementsByTagName("*").length>0)n.find.TAG=function(h,l){l=l.getElementsByTagName(h[1]);if(h[1]==="*"){h=[];for(var m=0;l[m];m++)l[m].nodeType===1&&h.push(l[m]);l=h}return l};g.innerHTML=""; +if(g.firstChild&&typeof g.firstChild.getAttribute!=="undefined"&&g.firstChild.getAttribute("href")!=="#")n.attrHandle.href=function(h){return h.getAttribute("href",2)};g=null})();s.querySelectorAll&&function(){var g=k,h=s.createElement("div");h.innerHTML="

";if(!(h.querySelectorAll&&h.querySelectorAll(".TEST").length===0)){k=function(m,q,p,v){q=q||s;if(!v&&q.nodeType===9&&!x(q))try{return z(q.querySelectorAll(m),p)}catch(t){}return g(m,q,p,v)};for(var l in g)k[l]=g[l];h=null}}(); +(function(){var g=s.createElement("div");g.innerHTML="
";if(!(!g.getElementsByClassName||g.getElementsByClassName("e").length===0)){g.lastChild.className="e";if(g.getElementsByClassName("e").length!==1){n.order.splice(1,0,"CLASS");n.find.CLASS=function(h,l,m){if(typeof l.getElementsByClassName!=="undefined"&&!m)return l.getElementsByClassName(h[1])};g=null}}})();var E=s.compareDocumentPosition?function(g,h){return!!(g.compareDocumentPosition(h)&16)}: +function(g,h){return g!==h&&(g.contains?g.contains(h):true)},x=function(g){return(g=(g?g.ownerDocument||g:0).documentElement)?g.nodeName!=="HTML":false},ga=function(g,h){var l=[],m="",q;for(h=h.nodeType?[h]:h;q=n.match.PSEUDO.exec(g);){m+=q[0];g=g.replace(n.match.PSEUDO,"")}g=n.relative[g]?g+"*":g;q=0;for(var p=h.length;q=0===d})};c.fn.extend({find:function(a){for(var b=this.pushStack("","find",a),d=0,f=0,e=this.length;f0)for(var j=d;j0},closest:function(a,b){if(c.isArray(a)){var d=[],f=this[0],e,j= +{},i;if(f&&a.length){e=0;for(var o=a.length;e-1:c(f).is(e)){d.push({selector:i,elem:f});delete j[i]}}f=f.parentNode}}return d}var k=c.expr.match.POS.test(a)?c(a,b||this.context):null;return this.map(function(n,r){for(;r&&r.ownerDocument&&r!==b;){if(k?k.index(r)>-1:c(r).is(a))return r;r=r.parentNode}return null})},index:function(a){if(!a||typeof a=== +"string")return c.inArray(this[0],a?c(a):this.parent().children());return c.inArray(a.jquery?a[0]:a,this)},add:function(a,b){a=typeof a==="string"?c(a,b||this.context):c.makeArray(a);b=c.merge(this.get(),a);return this.pushStack(qa(a[0])||qa(b[0])?b:c.unique(b))},andSelf:function(){return this.add(this.prevObject)}});c.each({parent:function(a){return(a=a.parentNode)&&a.nodeType!==11?a:null},parents:function(a){return c.dir(a,"parentNode")},parentsUntil:function(a,b,d){return c.dir(a,"parentNode", +d)},next:function(a){return c.nth(a,2,"nextSibling")},prev:function(a){return c.nth(a,2,"previousSibling")},nextAll:function(a){return c.dir(a,"nextSibling")},prevAll:function(a){return c.dir(a,"previousSibling")},nextUntil:function(a,b,d){return c.dir(a,"nextSibling",d)},prevUntil:function(a,b,d){return c.dir(a,"previousSibling",d)},siblings:function(a){return c.sibling(a.parentNode.firstChild,a)},children:function(a){return c.sibling(a.firstChild)},contents:function(a){return c.nodeName(a,"iframe")? +a.contentDocument||a.contentWindow.document:c.makeArray(a.childNodes)}},function(a,b){c.fn[a]=function(d,f){var e=c.map(this,b,d);eb.test(a)||(f=d);if(f&&typeof f==="string")e=c.filter(f,e);e=this.length>1?c.unique(e):e;if((this.length>1||gb.test(f))&&fb.test(a))e=e.reverse();return this.pushStack(e,a,R.call(arguments).join(","))}});c.extend({filter:function(a,b,d){if(d)a=":not("+a+")";return c.find.matches(a,b)},dir:function(a,b,d){var f=[];for(a=a[b];a&&a.nodeType!==9&&(d===w||a.nodeType!==1||!c(a).is(d));){a.nodeType=== +1&&f.push(a);a=a[b]}return f},nth:function(a,b,d){b=b||1;for(var f=0;a;a=a[d])if(a.nodeType===1&&++f===b)break;return a},sibling:function(a,b){for(var d=[];a;a=a.nextSibling)a.nodeType===1&&a!==b&&d.push(a);return d}});var Ja=/ jQuery\d+="(?:\d+|null)"/g,V=/^\s+/,Ka=/(<([\w:]+)[^>]*?)\/>/g,hb=/^(?:area|br|col|embed|hr|img|input|link|meta|param)$/i,La=/<([\w:]+)/,ib=/"},F={option:[1,""],legend:[1,"
","
"],thead:[1,"","
"],tr:[2,"","
"],td:[3,"","
"],col:[2,"","
"],area:[1,"",""],_default:[0,"",""]};F.optgroup=F.option;F.tbody=F.tfoot=F.colgroup=F.caption=F.thead;F.th=F.td;if(!c.support.htmlSerialize)F._default=[1,"div
","
"];c.fn.extend({text:function(a){if(c.isFunction(a))return this.each(function(b){var d= +c(this);d.text(a.call(this,b,d.text()))});if(typeof a!=="object"&&a!==w)return this.empty().append((this[0]&&this[0].ownerDocument||s).createTextNode(a));return c.text(this)},wrapAll:function(a){if(c.isFunction(a))return this.each(function(d){c(this).wrapAll(a.call(this,d))});if(this[0]){var b=c(a,this[0].ownerDocument).eq(0).clone(true);this[0].parentNode&&b.insertBefore(this[0]);b.map(function(){for(var d=this;d.firstChild&&d.firstChild.nodeType===1;)d=d.firstChild;return d}).append(this)}return this}, +wrapInner:function(a){if(c.isFunction(a))return this.each(function(b){c(this).wrapInner(a.call(this,b))});return this.each(function(){var b=c(this),d=b.contents();d.length?d.wrapAll(a):b.append(a)})},wrap:function(a){return this.each(function(){c(this).wrapAll(a)})},unwrap:function(){return this.parent().each(function(){c.nodeName(this,"body")||c(this).replaceWith(this.childNodes)}).end()},append:function(){return this.domManip(arguments,true,function(a){this.nodeType===1&&this.appendChild(a)})}, +prepend:function(){return this.domManip(arguments,true,function(a){this.nodeType===1&&this.insertBefore(a,this.firstChild)})},before:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,false,function(b){this.parentNode.insertBefore(b,this)});else if(arguments.length){var a=c(arguments[0]);a.push.apply(a,this.toArray());return this.pushStack(a,"before",arguments)}},after:function(){if(this[0]&&this[0].parentNode)return this.domManip(arguments,false,function(b){this.parentNode.insertBefore(b, +this.nextSibling)});else if(arguments.length){var a=this.pushStack(this,"after",arguments);a.push.apply(a,c(arguments[0]).toArray());return a}},remove:function(a,b){for(var d=0,f;(f=this[d])!=null;d++)if(!a||c.filter(a,[f]).length){if(!b&&f.nodeType===1){c.cleanData(f.getElementsByTagName("*"));c.cleanData([f])}f.parentNode&&f.parentNode.removeChild(f)}return this},empty:function(){for(var a=0,b;(b=this[a])!=null;a++)for(b.nodeType===1&&c.cleanData(b.getElementsByTagName("*"));b.firstChild;)b.removeChild(b.firstChild); +return this},clone:function(a){var b=this.map(function(){if(!c.support.noCloneEvent&&!c.isXMLDoc(this)){var d=this.outerHTML,f=this.ownerDocument;if(!d){d=f.createElement("div");d.appendChild(this.cloneNode(true));d=d.innerHTML}return c.clean([d.replace(Ja,"").replace(/=([^="'>\s]+\/)>/g,'="$1">').replace(V,"")],f)[0]}else return this.cloneNode(true)});if(a===true){ra(this,b);ra(this.find("*"),b.find("*"))}return b},html:function(a){if(a===w)return this[0]&&this[0].nodeType===1?this[0].innerHTML.replace(Ja, +""):null;else if(typeof a==="string"&&!ta.test(a)&&(c.support.leadingWhitespace||!V.test(a))&&!F[(La.exec(a)||["",""])[1].toLowerCase()]){a=a.replace(Ka,Ma);try{for(var b=0,d=this.length;b0||e.cacheable||this.length>1?k.cloneNode(true):k)}o.length&&c.each(o,Qa)}return this}});c.fragments={};c.each({appendTo:"append",prependTo:"prepend",insertBefore:"before",insertAfter:"after",replaceAll:"replaceWith"},function(a,b){c.fn[a]=function(d){var f=[];d=c(d);var e=this.length===1&&this[0].parentNode;if(e&&e.nodeType===11&&e.childNodes.length===1&&d.length===1){d[b](this[0]); +return this}else{e=0;for(var j=d.length;e0?this.clone(true):this).get();c.fn[b].apply(c(d[e]),i);f=f.concat(i)}return this.pushStack(f,a,d.selector)}}});c.extend({clean:function(a,b,d,f){b=b||s;if(typeof b.createElement==="undefined")b=b.ownerDocument||b[0]&&b[0].ownerDocument||s;for(var e=[],j=0,i;(i=a[j])!=null;j++){if(typeof i==="number")i+="";if(i){if(typeof i==="string"&&!jb.test(i))i=b.createTextNode(i);else if(typeof i==="string"){i=i.replace(Ka,Ma);var o=(La.exec(i)||["", +""])[1].toLowerCase(),k=F[o]||F._default,n=k[0],r=b.createElement("div");for(r.innerHTML=k[1]+i+k[2];n--;)r=r.lastChild;if(!c.support.tbody){n=ib.test(i);o=o==="table"&&!n?r.firstChild&&r.firstChild.childNodes:k[1]===""&&!n?r.childNodes:[];for(k=o.length-1;k>=0;--k)c.nodeName(o[k],"tbody")&&!o[k].childNodes.length&&o[k].parentNode.removeChild(o[k])}!c.support.leadingWhitespace&&V.test(i)&&r.insertBefore(b.createTextNode(V.exec(i)[0]),r.firstChild);i=r.childNodes}if(i.nodeType)e.push(i);else e= +c.merge(e,i)}}if(d)for(j=0;e[j];j++)if(f&&c.nodeName(e[j],"script")&&(!e[j].type||e[j].type.toLowerCase()==="text/javascript"))f.push(e[j].parentNode?e[j].parentNode.removeChild(e[j]):e[j]);else{e[j].nodeType===1&&e.splice.apply(e,[j+1,0].concat(c.makeArray(e[j].getElementsByTagName("script"))));d.appendChild(e[j])}return e},cleanData:function(a){for(var b,d,f=c.cache,e=c.event.special,j=c.support.deleteExpando,i=0,o;(o=a[i])!=null;i++)if(d=o[c.expando]){b=f[d];if(b.events)for(var k in b.events)e[k]? +c.event.remove(o,k):Ca(o,k,b.handle);if(j)delete o[c.expando];else o.removeAttribute&&o.removeAttribute(c.expando);delete f[d]}}});var kb=/z-?index|font-?weight|opacity|zoom|line-?height/i,Na=/alpha\([^)]*\)/,Oa=/opacity=([^)]*)/,ha=/float/i,ia=/-([a-z])/ig,lb=/([A-Z])/g,mb=/^-?\d+(?:px)?$/i,nb=/^-?\d/,ob={position:"absolute",visibility:"hidden",display:"block"},pb=["Left","Right"],qb=["Top","Bottom"],rb=s.defaultView&&s.defaultView.getComputedStyle,Pa=c.support.cssFloat?"cssFloat":"styleFloat",ja= +function(a,b){return b.toUpperCase()};c.fn.css=function(a,b){return X(this,a,b,true,function(d,f,e){if(e===w)return c.curCSS(d,f);if(typeof e==="number"&&!kb.test(f))e+="px";c.style(d,f,e)})};c.extend({style:function(a,b,d){if(!a||a.nodeType===3||a.nodeType===8)return w;if((b==="width"||b==="height")&&parseFloat(d)<0)d=w;var f=a.style||a,e=d!==w;if(!c.support.opacity&&b==="opacity"){if(e){f.zoom=1;b=parseInt(d,10)+""==="NaN"?"":"alpha(opacity="+d*100+")";a=f.filter||c.curCSS(a,"filter")||"";f.filter= +Na.test(a)?a.replace(Na,b):b}return f.filter&&f.filter.indexOf("opacity=")>=0?parseFloat(Oa.exec(f.filter)[1])/100+"":""}if(ha.test(b))b=Pa;b=b.replace(ia,ja);if(e)f[b]=d;return f[b]},css:function(a,b,d,f){if(b==="width"||b==="height"){var e,j=b==="width"?pb:qb;function i(){e=b==="width"?a.offsetWidth:a.offsetHeight;f!=="border"&&c.each(j,function(){f||(e-=parseFloat(c.curCSS(a,"padding"+this,true))||0);if(f==="margin")e+=parseFloat(c.curCSS(a,"margin"+this,true))||0;else e-=parseFloat(c.curCSS(a, +"border"+this+"Width",true))||0})}a.offsetWidth!==0?i():c.swap(a,ob,i);return Math.max(0,Math.round(e))}return c.curCSS(a,b,d)},curCSS:function(a,b,d){var f,e=a.style;if(!c.support.opacity&&b==="opacity"&&a.currentStyle){f=Oa.test(a.currentStyle.filter||"")?parseFloat(RegExp.$1)/100+"":"";return f===""?"1":f}if(ha.test(b))b=Pa;if(!d&&e&&e[b])f=e[b];else if(rb){if(ha.test(b))b="float";b=b.replace(lb,"-$1").toLowerCase();e=a.ownerDocument.defaultView;if(!e)return null;if(a=e.getComputedStyle(a,null))f= +a.getPropertyValue(b);if(b==="opacity"&&f==="")f="1"}else if(a.currentStyle){d=b.replace(ia,ja);f=a.currentStyle[b]||a.currentStyle[d];if(!mb.test(f)&&nb.test(f)){b=e.left;var j=a.runtimeStyle.left;a.runtimeStyle.left=a.currentStyle.left;e.left=d==="fontSize"?"1em":f||0;f=e.pixelLeft+"px";e.left=b;a.runtimeStyle.left=j}}return f},swap:function(a,b,d){var f={};for(var e in b){f[e]=a.style[e];a.style[e]=b[e]}d.call(a);for(e in b)a.style[e]=f[e]}});if(c.expr&&c.expr.filters){c.expr.filters.hidden=function(a){var b= +a.offsetWidth,d=a.offsetHeight,f=a.nodeName.toLowerCase()==="tr";return b===0&&d===0&&!f?true:b>0&&d>0&&!f?false:c.curCSS(a,"display")==="none"};c.expr.filters.visible=function(a){return!c.expr.filters.hidden(a)}}var sb=J(),tb=//gi,ub=/select|textarea/i,vb=/color|date|datetime|email|hidden|month|number|password|range|search|tel|text|time|url|week/i,N=/=\?(&|$)/,ka=/\?/,wb=/(\?|&)_=.*?(&|$)/,xb=/^(\w+:)?\/\/([^\/?#]+)/,yb=/%20/g,zb=c.fn.load;c.fn.extend({load:function(a,b,d){if(typeof a!== +"string")return zb.call(this,a);else if(!this.length)return this;var f=a.indexOf(" ");if(f>=0){var e=a.slice(f,a.length);a=a.slice(0,f)}f="GET";if(b)if(c.isFunction(b)){d=b;b=null}else if(typeof b==="object"){b=c.param(b,c.ajaxSettings.traditional);f="POST"}var j=this;c.ajax({url:a,type:f,dataType:"html",data:b,complete:function(i,o){if(o==="success"||o==="notmodified")j.html(e?c("
").append(i.responseText.replace(tb,"")).find(e):i.responseText);d&&j.each(d,[i.responseText,o,i])}});return this}, +serialize:function(){return c.param(this.serializeArray())},serializeArray:function(){return this.map(function(){return this.elements?c.makeArray(this.elements):this}).filter(function(){return this.name&&!this.disabled&&(this.checked||ub.test(this.nodeName)||vb.test(this.type))}).map(function(a,b){a=c(this).val();return a==null?null:c.isArray(a)?c.map(a,function(d){return{name:b.name,value:d}}):{name:b.name,value:a}}).get()}});c.each("ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split(" "), +function(a,b){c.fn[b]=function(d){return this.bind(b,d)}});c.extend({get:function(a,b,d,f){if(c.isFunction(b)){f=f||d;d=b;b=null}return c.ajax({type:"GET",url:a,data:b,success:d,dataType:f})},getScript:function(a,b){return c.get(a,null,b,"script")},getJSON:function(a,b,d){return c.get(a,b,d,"json")},post:function(a,b,d,f){if(c.isFunction(b)){f=f||d;d=b;b={}}return c.ajax({type:"POST",url:a,data:b,success:d,dataType:f})},ajaxSetup:function(a){c.extend(c.ajaxSettings,a)},ajaxSettings:{url:location.href, +global:true,type:"GET",contentType:"application/x-www-form-urlencoded",processData:true,async:true,xhr:A.XMLHttpRequest&&(A.location.protocol!=="file:"||!A.ActiveXObject)?function(){return new A.XMLHttpRequest}:function(){try{return new A.ActiveXObject("Microsoft.XMLHTTP")}catch(a){}},accepts:{xml:"application/xml, text/xml",html:"text/html",script:"text/javascript, application/javascript",json:"application/json, text/javascript",text:"text/plain",_default:"*/*"}},lastModified:{},etag:{},ajax:function(a){function b(){e.success&& +e.success.call(k,o,i,x);e.global&&f("ajaxSuccess",[x,e])}function d(){e.complete&&e.complete.call(k,x,i);e.global&&f("ajaxComplete",[x,e]);e.global&&!--c.active&&c.event.trigger("ajaxStop")}function f(q,p){(e.context?c(e.context):c.event).trigger(q,p)}var e=c.extend(true,{},c.ajaxSettings,a),j,i,o,k=a&&a.context||e,n=e.type.toUpperCase();if(e.data&&e.processData&&typeof e.data!=="string")e.data=c.param(e.data,e.traditional);if(e.dataType==="jsonp"){if(n==="GET")N.test(e.url)||(e.url+=(ka.test(e.url)? +"&":"?")+(e.jsonp||"callback")+"=?");else if(!e.data||!N.test(e.data))e.data=(e.data?e.data+"&":"")+(e.jsonp||"callback")+"=?";e.dataType="json"}if(e.dataType==="json"&&(e.data&&N.test(e.data)||N.test(e.url))){j=e.jsonpCallback||"jsonp"+sb++;if(e.data)e.data=(e.data+"").replace(N,"="+j+"$1");e.url=e.url.replace(N,"="+j+"$1");e.dataType="script";A[j]=A[j]||function(q){o=q;b();d();A[j]=w;try{delete A[j]}catch(p){}z&&z.removeChild(C)}}if(e.dataType==="script"&&e.cache===null)e.cache=false;if(e.cache=== +false&&n==="GET"){var r=J(),u=e.url.replace(wb,"$1_="+r+"$2");e.url=u+(u===e.url?(ka.test(e.url)?"&":"?")+"_="+r:"")}if(e.data&&n==="GET")e.url+=(ka.test(e.url)?"&":"?")+e.data;e.global&&!c.active++&&c.event.trigger("ajaxStart");r=(r=xb.exec(e.url))&&(r[1]&&r[1]!==location.protocol||r[2]!==location.host);if(e.dataType==="script"&&n==="GET"&&r){var z=s.getElementsByTagName("head")[0]||s.documentElement,C=s.createElement("script");C.src=e.url;if(e.scriptCharset)C.charset=e.scriptCharset;if(!j){var B= +false;C.onload=C.onreadystatechange=function(){if(!B&&(!this.readyState||this.readyState==="loaded"||this.readyState==="complete")){B=true;b();d();C.onload=C.onreadystatechange=null;z&&C.parentNode&&z.removeChild(C)}}}z.insertBefore(C,z.firstChild);return w}var E=false,x=e.xhr();if(x){e.username?x.open(n,e.url,e.async,e.username,e.password):x.open(n,e.url,e.async);try{if(e.data||a&&a.contentType)x.setRequestHeader("Content-Type",e.contentType);if(e.ifModified){c.lastModified[e.url]&&x.setRequestHeader("If-Modified-Since", +c.lastModified[e.url]);c.etag[e.url]&&x.setRequestHeader("If-None-Match",c.etag[e.url])}r||x.setRequestHeader("X-Requested-With","XMLHttpRequest");x.setRequestHeader("Accept",e.dataType&&e.accepts[e.dataType]?e.accepts[e.dataType]+", */*":e.accepts._default)}catch(ga){}if(e.beforeSend&&e.beforeSend.call(k,x,e)===false){e.global&&!--c.active&&c.event.trigger("ajaxStop");x.abort();return false}e.global&&f("ajaxSend",[x,e]);var g=x.onreadystatechange=function(q){if(!x||x.readyState===0||q==="abort"){E|| +d();E=true;if(x)x.onreadystatechange=c.noop}else if(!E&&x&&(x.readyState===4||q==="timeout")){E=true;x.onreadystatechange=c.noop;i=q==="timeout"?"timeout":!c.httpSuccess(x)?"error":e.ifModified&&c.httpNotModified(x,e.url)?"notmodified":"success";var p;if(i==="success")try{o=c.httpData(x,e.dataType,e)}catch(v){i="parsererror";p=v}if(i==="success"||i==="notmodified")j||b();else c.handleError(e,x,i,p);d();q==="timeout"&&x.abort();if(e.async)x=null}};try{var h=x.abort;x.abort=function(){x&&h.call(x); +g("abort")}}catch(l){}e.async&&e.timeout>0&&setTimeout(function(){x&&!E&&g("timeout")},e.timeout);try{x.send(n==="POST"||n==="PUT"||n==="DELETE"?e.data:null)}catch(m){c.handleError(e,x,null,m);d()}e.async||g();return x}},handleError:function(a,b,d,f){if(a.error)a.error.call(a.context||a,b,d,f);if(a.global)(a.context?c(a.context):c.event).trigger("ajaxError",[b,a,f])},active:0,httpSuccess:function(a){try{return!a.status&&location.protocol==="file:"||a.status>=200&&a.status<300||a.status===304||a.status=== +1223||a.status===0}catch(b){}return false},httpNotModified:function(a,b){var d=a.getResponseHeader("Last-Modified"),f=a.getResponseHeader("Etag");if(d)c.lastModified[b]=d;if(f)c.etag[b]=f;return a.status===304||a.status===0},httpData:function(a,b,d){var f=a.getResponseHeader("content-type")||"",e=b==="xml"||!b&&f.indexOf("xml")>=0;a=e?a.responseXML:a.responseText;e&&a.documentElement.nodeName==="parsererror"&&c.error("parsererror");if(d&&d.dataFilter)a=d.dataFilter(a,b);if(typeof a==="string")if(b=== +"json"||!b&&f.indexOf("json")>=0)a=c.parseJSON(a);else if(b==="script"||!b&&f.indexOf("javascript")>=0)c.globalEval(a);return a},param:function(a,b){function d(i,o){if(c.isArray(o))c.each(o,function(k,n){b||/\[\]$/.test(i)?f(i,n):d(i+"["+(typeof n==="object"||c.isArray(n)?k:"")+"]",n)});else!b&&o!=null&&typeof o==="object"?c.each(o,function(k,n){d(i+"["+k+"]",n)}):f(i,o)}function f(i,o){o=c.isFunction(o)?o():o;e[e.length]=encodeURIComponent(i)+"="+encodeURIComponent(o)}var e=[];if(b===w)b=c.ajaxSettings.traditional; +if(c.isArray(a)||a.jquery)c.each(a,function(){f(this.name,this.value)});else for(var j in a)d(j,a[j]);return e.join("&").replace(yb,"+")}});var la={},Ab=/toggle|show|hide/,Bb=/^([+-]=)?([\d+-.]+)(.*)$/,W,va=[["height","marginTop","marginBottom","paddingTop","paddingBottom"],["width","marginLeft","marginRight","paddingLeft","paddingRight"],["opacity"]];c.fn.extend({show:function(a,b){if(a||a===0)return this.animate(K("show",3),a,b);else{a=0;for(b=this.length;a").appendTo("body");f=e.css("display");if(f==="none")f="block";e.remove();la[d]=f}c.data(this[a],"olddisplay",f)}}a=0;for(b=this.length;a=0;f--)if(d[f].elem===this){b&&d[f](true);d.splice(f,1)}});b||this.dequeue();return this}});c.each({slideDown:K("show",1),slideUp:K("hide",1),slideToggle:K("toggle",1),fadeIn:{opacity:"show"},fadeOut:{opacity:"hide"}},function(a,b){c.fn[a]=function(d,f){return this.animate(b,d,f)}});c.extend({speed:function(a,b,d){var f=a&&typeof a==="object"?a:{complete:d||!d&&b||c.isFunction(a)&&a,duration:a,easing:d&&b||b&&!c.isFunction(b)&&b};f.duration=c.fx.off?0:typeof f.duration=== +"number"?f.duration:c.fx.speeds[f.duration]||c.fx.speeds._default;f.old=f.complete;f.complete=function(){f.queue!==false&&c(this).dequeue();c.isFunction(f.old)&&f.old.call(this)};return f},easing:{linear:function(a,b,d,f){return d+f*a},swing:function(a,b,d,f){return(-Math.cos(a*Math.PI)/2+0.5)*f+d}},timers:[],fx:function(a,b,d){this.options=b;this.elem=a;this.prop=d;if(!b.orig)b.orig={}}});c.fx.prototype={update:function(){this.options.step&&this.options.step.call(this.elem,this.now,this);(c.fx.step[this.prop]|| +c.fx.step._default)(this);if((this.prop==="height"||this.prop==="width")&&this.elem.style)this.elem.style.display="block"},cur:function(a){if(this.elem[this.prop]!=null&&(!this.elem.style||this.elem.style[this.prop]==null))return this.elem[this.prop];return(a=parseFloat(c.css(this.elem,this.prop,a)))&&a>-10000?a:parseFloat(c.curCSS(this.elem,this.prop))||0},custom:function(a,b,d){function f(j){return e.step(j)}this.startTime=J();this.start=a;this.end=b;this.unit=d||this.unit||"px";this.now=this.start; +this.pos=this.state=0;var e=this;f.elem=this.elem;if(f()&&c.timers.push(f)&&!W)W=setInterval(c.fx.tick,13)},show:function(){this.options.orig[this.prop]=c.style(this.elem,this.prop);this.options.show=true;this.custom(this.prop==="width"||this.prop==="height"?1:0,this.cur());c(this.elem).show()},hide:function(){this.options.orig[this.prop]=c.style(this.elem,this.prop);this.options.hide=true;this.custom(this.cur(),0)},step:function(a){var b=J(),d=true;if(a||b>=this.options.duration+this.startTime){this.now= +this.end;this.pos=this.state=1;this.update();this.options.curAnim[this.prop]=true;for(var f in this.options.curAnim)if(this.options.curAnim[f]!==true)d=false;if(d){if(this.options.display!=null){this.elem.style.overflow=this.options.overflow;a=c.data(this.elem,"olddisplay");this.elem.style.display=a?a:this.options.display;if(c.css(this.elem,"display")==="none")this.elem.style.display="block"}this.options.hide&&c(this.elem).hide();if(this.options.hide||this.options.show)for(var e in this.options.curAnim)c.style(this.elem, +e,this.options.orig[e]);this.options.complete.call(this.elem)}return false}else{e=b-this.startTime;this.state=e/this.options.duration;a=this.options.easing||(c.easing.swing?"swing":"linear");this.pos=c.easing[this.options.specialEasing&&this.options.specialEasing[this.prop]||a](this.state,e,0,1,this.options.duration);this.now=this.start+(this.end-this.start)*this.pos;this.update()}return true}};c.extend(c.fx,{tick:function(){for(var a=c.timers,b=0;b
"; +a.insertBefore(b,a.firstChild);d=b.firstChild;f=d.firstChild;e=d.nextSibling.firstChild.firstChild;this.doesNotAddBorder=f.offsetTop!==5;this.doesAddBorderForTableAndCells=e.offsetTop===5;f.style.position="fixed";f.style.top="20px";this.supportsFixedPosition=f.offsetTop===20||f.offsetTop===15;f.style.position=f.style.top="";d.style.overflow="hidden";d.style.position="relative";this.subtractsBorderForOverflowNotVisible=f.offsetTop===-5;this.doesNotIncludeMarginInBodyOffset=a.offsetTop!==j;a.removeChild(b); +c.offset.initialize=c.noop},bodyOffset:function(a){var b=a.offsetTop,d=a.offsetLeft;c.offset.initialize();if(c.offset.doesNotIncludeMarginInBodyOffset){b+=parseFloat(c.curCSS(a,"marginTop",true))||0;d+=parseFloat(c.curCSS(a,"marginLeft",true))||0}return{top:b,left:d}},setOffset:function(a,b,d){if(/static/.test(c.curCSS(a,"position")))a.style.position="relative";var f=c(a),e=f.offset(),j=parseInt(c.curCSS(a,"top",true),10)||0,i=parseInt(c.curCSS(a,"left",true),10)||0;if(c.isFunction(b))b=b.call(a, +d,e);d={top:b.top-e.top+j,left:b.left-e.left+i};"using"in b?b.using.call(a,d):f.css(d)}};c.fn.extend({position:function(){if(!this[0])return null;var a=this[0],b=this.offsetParent(),d=this.offset(),f=/^body|html$/i.test(b[0].nodeName)?{top:0,left:0}:b.offset();d.top-=parseFloat(c.curCSS(a,"marginTop",true))||0;d.left-=parseFloat(c.curCSS(a,"marginLeft",true))||0;f.top+=parseFloat(c.curCSS(b[0],"borderTopWidth",true))||0;f.left+=parseFloat(c.curCSS(b[0],"borderLeftWidth",true))||0;return{top:d.top- +f.top,left:d.left-f.left}},offsetParent:function(){return this.map(function(){for(var a=this.offsetParent||s.body;a&&!/^body|html$/i.test(a.nodeName)&&c.css(a,"position")==="static";)a=a.offsetParent;return a})}});c.each(["Left","Top"],function(a,b){var d="scroll"+b;c.fn[d]=function(f){var e=this[0],j;if(!e)return null;if(f!==w)return this.each(function(){if(j=wa(this))j.scrollTo(!a?f:c(j).scrollLeft(),a?f:c(j).scrollTop());else this[d]=f});else return(j=wa(e))?"pageXOffset"in j?j[a?"pageYOffset": +"pageXOffset"]:c.support.boxModel&&j.document.documentElement[d]||j.document.body[d]:e[d]}});c.each(["Height","Width"],function(a,b){var d=b.toLowerCase();c.fn["inner"+b]=function(){return this[0]?c.css(this[0],d,false,"padding"):null};c.fn["outer"+b]=function(f){return this[0]?c.css(this[0],d,false,f?"margin":"border"):null};c.fn[d]=function(f){var e=this[0];if(!e)return f==null?null:this;if(c.isFunction(f))return this.each(function(j){var i=c(this);i[d](f.call(this,j,i[d]()))});return"scrollTo"in +e&&e.document?e.document.compatMode==="CSS1Compat"&&e.document.documentElement["client"+b]||e.document.body["client"+b]:e.nodeType===9?Math.max(e.documentElement["client"+b],e.body["scroll"+b],e.documentElement["scroll"+b],e.body["offset"+b],e.documentElement["offset"+b]):f===w?c.css(e,d):this.css(d,typeof f==="string"?f:f+"px")}});A.jQuery=A.$=c})(window); diff --git a/includes/php-gettext/ChangeLog b/includes/php-gettext/ChangeLog deleted file mode 100644 index 5e0949d..0000000 --- a/includes/php-gettext/ChangeLog +++ /dev/null @@ -1,144 +0,0 @@ -2006-02-07 Danilo Šegan - - * examples/pigs_dropin.php: comment-out bind_textdomain_codeset - - * gettext.inc (T_bind_textdomain_codeset): bind_textdomain_codeset - is available only in PHP 4.2.0+ (thanks to Jens A. Tkotz). - - * Makefile: Include gettext.inc in DIST_FILES, VERSION up to - 1.0.7. - -2006-02-03 Danilo Šegan - - Added setlocale() emulation as well. - - * examples/pigs_dropin.php: Use T_setlocale() and locale_emulation(). - * examples/pigs_fallback.php: Use T_setlocale() and locale_emulation(). - - * gettext.inc: Added globals $EMULATEGETTEXT and $CURRENTLOCALE. - (locale_emulation): Whether emulation is active. - (_check_locale): Rewrite. - (_setlocale): Added emulated setlocale function. - (T_setlocale): Wrapper around _setlocale. - (_get_reader): Use variables and _setlocale. - -2006-02-02 Danilo Šegan - - Fix bug #12192. - - * examples/locale/sr_CS/LC_MESSAGES/messages.po: Correct grammar. - * examples/locale/sr_CS/LC_MESSAGES/messages.mo: Rebuild. - -2006-02-02 Danilo Šegan - - Fix bug #15419. - - * streams.php: Support for PHP 5.1.1 fread() which reads most 8kb. - (Fix by Piotr Szotkowski ) - -2006-02-02 Danilo Šegan - - Merge Steven Armstrong's changes, supporting standard gettext - interfaces: - - * examples/*: Restructured examples. - * gettext.inc: Added. - * AUTHORS: Added Steven. - * Makefile (VERSION): Up to 1.0.6. - -2006-01-28 Nico Kaiser - - * gettext.php (select_string): Fix "true" <-> 1 difference of PHP - -2005-07-29 Danilo Šegan - - * Makefile (VERSION): Up to 1.0.5. - -2005-07-29 Danilo Šegan - - Fixes bug #13850. - - * gettext.php (gettext_reader): check $Reader->error as well. - -2005-07-29 Danilo Šegan - - * Makefile (VERSION): Up to 1.0.4. - -2005-07-29 Danilo Šegan - - Fixes bug #13771. - - * gettext.php (gettext_reader->get_plural_forms): Plural forms - header extraction regex change. Reported by Edgar Gonzales. - -2005-02-28 Danilo Šegan - - * AUTHORS: Added Nico to the list. - - * Makefile (VERSION): Up to 1.0.3. - - * README: Updated. - -2005-02-28 Danilo Šegan - - * gettext.php: Added pre-loading, code documentation, and many - code clean-ups by Nico Kaiser . - -2005-02-28 Danilo Šegan - - * streams.php (FileReader.read): Handle read($bytes = 0). - - * examples/pigs.php: Prefix gettext function names with T or T_. - - * examples/update: Use the same keywords T_ and T_ngettext. - - * streams.php: Added CachedFileReader. - -2003-11-11 Danilo Šegan - - * gettext.php: Added hashing to find_string. - -2003-11-01 Danilo Šegan - - * Makefile (DIST_FILES): Replaced LICENSE with COPYING. - (VERSION): Up to 1.0.2. - - * AUTHORS: Minor edits. - - * README: Minor edits. - - * COPYING: Removed LICENSE, added this file. - - * gettext.php: Added copyright notice and disclaimer. - * streams.php: Same. - * examples/pigs.php: Same. - -2003-10-23 Danilo Šegan - - * Makefile: Upped version to 1.0.1. - - * gettext.php (gettext_reader): Remove a call to set_total_plurals. - (set_total_plurals): Removed unused function for some better days. - -2003-10-23 Danilo Šegan - - * Makefile: Added, version 1.0.0. - - * examples/*: Added an example of usage. - - * README: Described all the crap. - -2003-10-22 Danilo Šegan - - * gettext.php: Plural forms implemented too. - - * streams.php: Added FileReader for direct access to files (no - need to keep file in memory). - - * gettext.php: It works, except for plural forms. - - * streams.php: Created abstract class StreamReader. - Added StringReader class. - - * gettext.php: Started writing gettext_reader. - diff --git a/includes/php-gettext/Makefile b/includes/php-gettext/Makefile index 2dba911..a6cce12 100644 --- a/includes/php-gettext/Makefile +++ b/includes/php-gettext/Makefile @@ -1,12 +1,11 @@ PACKAGE = php-gettext-$(VERSION) -VERSION = 1.0.7 +VERSION = 1.0.10 DIST_FILES = \ gettext.php \ gettext.inc \ streams.php \ AUTHORS \ - ChangeLog \ README \ COPYING \ Makefile \ @@ -17,9 +16,14 @@ DIST_FILES = \ examples/locale/sr_CS/LC_MESSAGES/messages.mo \ examples/locale/de_CH/LC_MESSAGES/messages.po \ examples/locale/de_CH/LC_MESSAGES/messages.mo \ - examples/update + examples/update \ + tests/LocalesTest.php \ + tests/ParsingTest.php -dist: +check: + phpunit --verbose tests + +dist: check if [ -d $(PACKAGE) ]; then \ rm -rf $(PACKAGE); \ fi; \ @@ -30,3 +34,5 @@ dist: rm -rf $(PACKAGE); \ fi; +clean: + rm -f $(PACKAGE).tar.gz diff --git a/includes/php-gettext/README b/includes/php-gettext/README index c7525e2..bca4f91 100644 --- a/includes/php-gettext/README +++ b/includes/php-gettext/README @@ -1,9 +1,9 @@ -PHP-gettext 1.0 +PHP-gettext 1.0 (https://launchpad.net/php-gettext) -Copyright 2003, 2006 -- Danilo "angry with PHP[1]" Segan +Copyright 2003, 2006, 2009 -- Danilo "angry with PHP[1]" Segan Licensed under GPLv2 (or any later version, see COPYING) -[1] PHP is actually cyrillic, and translates roughly to +[1] PHP is actually cyrillic, and translates roughly to "works-doesn't-work" (UTF-8: Ради-Не-Ради) @@ -50,36 +50,16 @@ Features file data, I used imaginary abstract class StreamReader to do all the input (check streams.php). For your convenience, I've already provided two classes for reading files: FileReader and - StringReader (CachedFileReader is a combination of the two: it - loads entire file contents into a string, and then works on that). - See example below for usage. You can for instance use StringReader - when you read in data from a database, or you can create your own - derivative of StreamReader for anything you like. - + StringReader (CachedFileReader is a combination of the two: it + loads entire file contents into a string, and then works on that). + See example below for usage. You can for instance use StringReader + when you read in data from a database, or you can create your own + derivative of StreamReader for anything you like. -Bugs - - Plural-forms field in MO header (translation for empty string, - i.e. "") is treated according to PHP syntactic rules (it's - eval()ed). Since these should actually follow C syntax, there are - some problems. - For instance, I'm used to using this: - Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : \ - n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2; - but it fails with PHP (it sets $plural=2 instead of 0 for $n==1). - - The fix is usually simple, but I'm lazy to go into the details of - PHP operator precedence, and maybe try to fix it. In here, I had - to put everything after the first ':' in parenthesis: - Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : \ - (n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2); - That works, and I'm satisfied. +Bugs - Besides this one, there are probably a bunch of other bugs, since - I hate PHP (did I mention it already? no? strange), and don't - know it very well. So, feel free to fix any of those and report - them back to me at . + Report them on https://bugs.launchpad.net/php-gettext Usage @@ -94,19 +74,19 @@ Usage Then, use that as a parameter to gettext_reader constructor: $wohoo = new gettext_reader($streamer); - If you want to disable pre-loading of entire message catalog in - memory (if, for example, you have a multi-thousand message catalog - which you'll use only occasionally), use "false" for second + If you want to disable pre-loading of entire message catalog in + memory (if, for example, you have a multi-thousand message catalog + which you'll use only occasionally), use "false" for second parameter to gettext_reader constructor: $wohoo = new gettext_reader($streamer, false); From now on, you have all the benefits of gettext data at your - disposal, so may run: + disposal, so may run: print $wohoo->translate("This is a test"); print $wohoo->ngettext("%d bird", "%d birds", $birds); You might need to pass parameter "-k" to xgettext to make it - extract all the strings. In above example, try with + extract all the strings. In above example, try with xgettext -ktranslate -kngettext:1,2 file.php what should create messages.po which contains two messages for translation. @@ -118,8 +98,8 @@ Usage Usage with gettext.inc (standard gettext interfaces emulation) - Check example in examples/pig_dropin.php, basically you include - gettext.inc and use all the standard gettext interfaces as + Check example in examples/pig_dropin.php, basically you include + gettext.inc and use all the standard gettext interfaces as documented on: http://www.php.net/gettext @@ -137,20 +117,12 @@ Example There is also simple "update" script that can be used to generate POT file and to update the translation using msgmerge. -Interesting TODO: +TODO: - o Try to parse "plural-forms" header field, and to follow C syntax - rules. This won't be easy. + o Improve speed to be even more comparable to the native gettext + implementation. -Boring TODO: - - o Learn PHP and fix bugs, slowness and other stuff resulting from - my lack of knowledge (but *maybe*, it's not my knowledge that is - bad, but PHP itself ;-). - - (This is mostly done thanks to Nico Kaiser.) - - o Try to use hash tables in MO files: with pre-loading, would it + o Try to use hash tables in MO files: with pre-loading, would it be useful at all? Never-asked-questions: @@ -160,7 +132,7 @@ Never-asked-questions: Well, it's quite simple. I consider that the first released thing should be labeled "version 1" (first, right?). Zero is there to - indicate that there's zero improvement and/or change compared to + indicate that there's zero improvement and/or change compared to "version 1". I plan to use version numbers 1.0.* for small bugfixes, and to @@ -173,7 +145,7 @@ Never-asked-questions: Mozart's 40th Symphony (there is one like that, right?). o Can I...? - + Yes, you can. This is free software (as in freedom, free speech), and you might do whatever you wish with it, provided you do not limit freedom of others (GPL). diff --git a/includes/php-gettext/bin/gettexts.bat b/includes/php-gettext/bin/gettexts.bat deleted file mode 100644 index 212ffce..0000000 --- a/includes/php-gettext/bin/gettexts.bat +++ /dev/null @@ -1,20 +0,0 @@ -@echo off -xgettext -kT_ngettext:1,2 -kT_ -L PHP -o ..\..\..\locales\messages.po ..\..\..\*.php ..\..\..\services\*.php ..\..\..\templates\*.php -if /i "%1" == "-p" goto stats -if exist "..\..\..\locales\%1.po" goto merge -echo "Usage: $0 [-p|]" -goto end - -:stats -msgfmt --statistics ..\..\..\locales\messages.po -goto end - -:merge -msgmerge -o ..\..\..\locales\tmp%1.po ..\..\..\locales\%1.po ..\..\..\locales\messages.po -if exist "..\..\..\locales\%1.po" rename ..\..\..\locales\%1.po %1.po.bak -rename ..\..\..\locales\tmp%1.po %1.po -if exist "..\..\..\locales\%1.po.bak" del ..\..\..\locales\%1.po.bak -msgfmt --statistics "..\..\..\locales\%1.po" - -:end -echo Finished \ No newline at end of file diff --git a/includes/php-gettext/examples/locale/sr_CS/LC_MESSAGES/messages.mo b/includes/php-gettext/examples/locale/sr_CS/LC_MESSAGES/messages.mo index 6ffccfd..497c883 100644 Binary files a/includes/php-gettext/examples/locale/sr_CS/LC_MESSAGES/messages.mo and b/includes/php-gettext/examples/locale/sr_CS/LC_MESSAGES/messages.mo differ diff --git a/includes/php-gettext/examples/locale/sr_CS/LC_MESSAGES/messages.po b/includes/php-gettext/examples/locale/sr_CS/LC_MESSAGES/messages.po index 7e620cc..e5da0e9 100644 --- a/includes/php-gettext/examples/locale/sr_CS/LC_MESSAGES/messages.po +++ b/includes/php-gettext/examples/locale/sr_CS/LC_MESSAGES/messages.po @@ -12,7 +12,8 @@ msgstr "" "MIME-Version: 1.0\n" "Content-Type: text/plain; charset=UTF-8\n" "Content-Transfer-Encoding: 8bit\n" -"Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : (n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2);\n" +"Plural-Forms: nplurals=3; plural=n%10==1 && n%100!=11 ? 0 : n%10>=2 && " +"n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2;\n" #: pigs.php:19 msgid "" diff --git a/includes/php-gettext/examples/pigs_dropin.php b/includes/php-gettext/examples/pigs_dropin.php index edd2b0d..94fd850 100644 --- a/includes/php-gettext/examples/pigs_dropin.php +++ b/includes/php-gettext/examples/pigs_dropin.php @@ -1,6 +1,6 @@ . + Copyright (c) 2003,2004,2005,2009 Danilo Segan . Copyright (c) 2005,2006 Steven Armstrong This file is part of PHP-gettext. @@ -21,10 +21,12 @@ */ +error_reporting(E_ALL | E_STRICT); + // define constants -define(PROJECT_DIR, realpath('./')); -define(LOCALE_DIR, PROJECT_DIR .'/locale'); -define(DEFAULT_LOCALE, 'en_US'); +define('PROJECT_DIR', realpath('./')); +define('LOCALE_DIR', PROJECT_DIR .'/locale'); +define('DEFAULT_LOCALE', 'en_US'); require_once('../gettext.inc'); diff --git a/includes/php-gettext/examples/pigs_fallback.php b/includes/php-gettext/examples/pigs_fallback.php index b50f752..353190d 100644 --- a/includes/php-gettext/examples/pigs_fallback.php +++ b/includes/php-gettext/examples/pigs_fallback.php @@ -1,6 +1,6 @@ . + Copyright (c) 2003,2004,2005,2009 Danilo Segan . Copyright (c) 2005,2006 Steven Armstrong This file is part of PHP-gettext. @@ -21,10 +21,12 @@ */ +error_reporting(E_ALL | E_STRICT); + // define constants -define(PROJECT_DIR, realpath('./')); -define(LOCALE_DIR, PROJECT_DIR .'/locale'); -define(DEFAULT_LOCALE, 'en_US'); +define('PROJECT_DIR', realpath('./')); +define('LOCALE_DIR', PROJECT_DIR .'/locale'); +define('DEFAULT_LOCALE', 'en_US'); require_once('../gettext.inc'); diff --git a/includes/php-gettext/examples/update b/includes/php-gettext/examples/update old mode 100644 new mode 100755 index c8d8b61..76b4308 --- a/includes/php-gettext/examples/update +++ b/includes/php-gettext/examples/update @@ -1,7 +1,7 @@ #!/bin/sh TEMPLATE=pigs.pot -xgettext -kT_ngettext:1,2 -kT_ -L PHP -o $TEMPLATE pigs.php -if [ x$1 == 'x-p' ]; then +xgettext -kT_ngettext:1,2 -kT_ -L PHP -o $TEMPLATE pigs_dropin.php +if [ "x$1" = "x-p" ]; then msgfmt --statistics $TEMPLATE else if [ -f $1.po ]; then diff --git a/includes/php-gettext/gettext.inc b/includes/php-gettext/gettext.inc index 21a41cc..399a0f2 100644 --- a/includes/php-gettext/gettext.inc +++ b/includes/php-gettext/gettext.inc @@ -1,9 +1,10 @@ - + Copyright (c) 2009 Danilo Segan + Drop in replacement for native gettext. - + This file is part of PHP-gettext. PHP-gettext is free software; you can redistribute it and/or modify @@ -22,17 +23,25 @@ */ /* -LC_CTYPE 0 -LC_NUMERIC 1 -LC_TIME 2 -LC_COLLATE 3 -LC_MONETARY 4 -LC_MESSAGES 5 -LC_ALL 6 +LC_CTYPE 0 +LC_NUMERIC 1 +LC_TIME 2 +LC_COLLATE 3 +LC_MONETARY 4 +LC_MESSAGES 5 +LC_ALL 6 */ -require(dirname(__FILE__) .'/streams.php'); -require(dirname(__FILE__) .'/gettext.php'); + +// LC_MESSAGES is not available if php-gettext is not loaded +// while the other constants are already available from session extension. +if (!defined('LC_MESSAGES')) { + define('LC_MESSAGES', 5); +} + +require('streams.php'); +require('gettext.php'); + // Variables @@ -43,29 +52,96 @@ $LC_CATEGORIES = array('LC_CTYPE', 'LC_NUMERIC', 'LC_TIME', 'LC_COLLATE', 'LC_MO $EMULATEGETTEXT = 0; $CURRENTLOCALE = ''; +/* Class to hold a single domain included in $text_domains. */ +class domain { + var $l10n; + var $path; + var $codeset; +} // Utility functions +/** + * Return a list of locales to try for any POSIX-style locale specification. + */ +function get_list_of_locales($locale) { + /* Figure out all possible locale names and start with the most + * specific ones. I.e. for sr_CS.UTF-8@latin, look through all of + * sr_CS.UTF-8@latin, sr_CS@latin, sr@latin, sr_CS.UTF-8, sr_CS, sr. + */ + $locale_names = array(); + $lang = NULL; + $country = NULL; + $charset = NULL; + $modifier = NULL; + if ($locale) { + if (preg_match("/^(?P[a-z]{2,3})" // language code + ."(?:_(?P[A-Z]{2}))?" // country code + ."(?:\.(?P[-A-Za-z0-9_]+))?" // charset + ."(?:@(?P[-A-Za-z0-9_]+))?$/", // @ modifier + $locale, $matches)) { + + if (isset($matches["lang"])) $lang = $matches["lang"]; + if (isset($matches["country"])) $country = $matches["country"]; + if (isset($matches["charset"])) $charset = $matches["charset"]; + if (isset($matches["modifier"])) $modifier = $matches["modifier"]; + + if ($modifier) { + if ($country) { + if ($charset) + array_push($locale_names, "${lang}_$country.$charset@$modifier"); + array_push($locale_names, "${lang}_$country@$modifier"); + } elseif ($charset) + array_push($locale_names, "${lang}.$charset@$modifier"); + array_push($locale_names, "$lang@$modifier"); + } + if ($country) { + if ($charset) + array_push($locale_names, "${lang}_$country.$charset"); + array_push($locale_names, "${lang}_$country"); + } elseif ($charset) + array_push($locale_names, "${lang}.$charset"); + array_push($locale_names, $lang); + } + + // If the locale name doesn't match POSIX style, just include it as-is. + if (!in_array($locale, $locale_names)) + array_push($locale_names, $locale); + } + return $locale_names; +} + /** * Utility function to get a StreamReader for the given text domain. */ function _get_reader($domain=null, $category=5, $enable_cache=true) { - global $text_domains, $default_domain, $LC_CATEGORIES; - if (!isset($domain)) $domain = $default_domain; - if (!isset($text_domains[$domain]->l10n)) { - // get the current locale - $locale = _setlocale(LC_MESSAGES, 0); - $p = isset($text_domains[$domain]->path) ? $text_domains[$domain]->path : './'; - $path = $p . "$locale/". $LC_CATEGORIES[$category] ."/$domain.mo"; - if (file_exists($path)) { - $input = new FileReader($path); - } - else { - $input = null; - } - $text_domains[$domain]->l10n = new gettext_reader($input, $enable_cache); - } - return $text_domains[$domain]->l10n; + global $text_domains, $default_domain, $LC_CATEGORIES; + if (!isset($domain)) $domain = $default_domain; + if (!isset($text_domains[$domain]->l10n)) { + // get the current locale + $locale = _setlocale(LC_MESSAGES, 0); + $bound_path = isset($text_domains[$domain]->path) ? + $text_domains[$domain]->path : './'; + $subpath = $LC_CATEGORIES[$category] ."/$domain.mo"; + + $locale_names = get_list_of_locales($locale); + $input = null; + foreach ($locale_names as $locale) { + $full_path = $bound_path . $locale . "/" . $subpath; + if (file_exists($full_path)) { + $input = new FileReader($full_path); + break; + } + } + + if (!array_key_exists($domain, $text_domains)) { + // Initialize an empty domain object. + $text_domains[$domain] = new domain(); + } + $text_domains[$domain]->l10n = new gettext_reader($input, + $enable_cache); + } + return $text_domains[$domain]->l10n; } /** @@ -79,8 +155,10 @@ function locale_emulation() { /** * Checks if the current locale is supported on this system. */ -function _check_locale() { +function _check_locale_and_function($function=false) { global $EMULATEGETTEXT; + if ($function and !function_exists($function)) + return false; return !$EMULATEGETTEXT; } @@ -88,56 +166,70 @@ function _check_locale() { * Get the codeset for the given domain. */ function _get_codeset($domain=null) { - global $text_domains, $default_domain, $LC_CATEGORIES; - if (!isset($domain)) $domain = $default_domain; - return (isset($text_domains[$domain]->codeset))? $text_domains[$domain]->codeset : ini_get('mbstring.internal_encoding'); + global $text_domains, $default_domain, $LC_CATEGORIES; + if (!isset($domain)) $domain = $default_domain; + return (isset($text_domains[$domain]->codeset))? $text_domains[$domain]->codeset : ini_get('mbstring.internal_encoding'); } /** * Convert the given string to the encoding set by bind_textdomain_codeset. */ function _encode($text) { - $source_encoding = mb_detect_encoding($text); - $target_encoding = _get_codeset(); - if ($source_encoding != $target_encoding) { - return mb_convert_encoding($text, $target_encoding, $source_encoding); - } - else { - return $text; - } + $source_encoding = mb_detect_encoding($text); + $target_encoding = _get_codeset(); + if ($source_encoding != $target_encoding) { + return mb_convert_encoding($text, $target_encoding, $source_encoding); + } + else { + return $text; + } } - - // Custom implementation of the standard gettext related functions +/** + * Returns passed in $locale, or environment variable $LANG if $locale == ''. + */ +function _get_default_locale($locale) { + if ($locale == '') // emulate variable support + return getenv('LANG'); + else + return $locale; +} + /** * Sets a requested locale, if needed emulates it. */ function _setlocale($category, $locale) { global $CURRENTLOCALE, $EMULATEGETTEXT; if ($locale === 0) { // use === to differentiate between string "0" - if ($CURRENTLOCALE != '') + if ($CURRENTLOCALE != '') return $CURRENTLOCALE; - else + else // obey LANG variable, maybe extend to support all of LC_* vars // even if we tried to read locale without setting it first return _setlocale($category, $CURRENTLOCALE); } else { - $ret = 0; - if (function_exists('setlocale')) // I don't know if this ever happens ;) - $ret = setlocale($category, $locale); - if (($ret and $locale == '') or ($ret == $locale)) { - $EMULATEGETTEXT = 0; + if (function_exists('setlocale')) { + $ret = setlocale($category, $locale); + if (($locale == '' and !$ret) or // failed setting it by env + ($locale != '' and $ret != $locale)) { // failed setting it + // Failed setting it according to environment. + $CURRENTLOCALE = _get_default_locale($locale); + $EMULATEGETTEXT = 1; + } else { $CURRENTLOCALE = $ret; + $EMULATEGETTEXT = 0; + } } else { - if ($locale == '') // emulate variable support - $CURRENTLOCALE = getenv('LANG'); - else - $CURRENTLOCALE = $locale; - $EMULATEGETTEXT = 1; + // No function setlocale(), emulate it all. + $CURRENTLOCALE = _get_default_locale($locale); + $EMULATEGETTEXT = 1; } + // Allow locale to be changed on the go for one translation domain. + global $text_domains, $default_domain; + unset($text_domains[$default_domain]->l10n); return $CURRENTLOCALE; } } @@ -146,135 +238,240 @@ function _setlocale($category, $locale) { * Sets the path for a domain. */ function _bindtextdomain($domain, $path) { - global $text_domains; - // ensure $path ends with a slash - if ($path[strlen($path) - 1] != '/') $path .= '/'; - elseif ($path[strlen($path) - 1] != '\\') $path .= '\\'; - $text_domains[$domain]->path = $path; + global $text_domains; + // ensure $path ends with a slash ('/' should work for both, but lets still play nice) + if (substr(php_uname(), 0, 7) == "Windows") { + if ($path[strlen($path)-1] != '\\' and $path[strlen($path)-1] != '/') + $path .= '\\'; + } else { + if ($path[strlen($path)-1] != '/') + $path .= '/'; + } + if (!array_key_exists($domain, $text_domains)) { + // Initialize an empty domain object. + $text_domains[$domain] = new domain(); + } + $text_domains[$domain]->path = $path; } /** * Specify the character encoding in which the messages from the DOMAIN message catalog will be returned. */ function _bind_textdomain_codeset($domain, $codeset) { - global $text_domains; - $text_domains[$domain]->codeset = $codeset; + global $text_domains; + $text_domains[$domain]->codeset = $codeset; } /** * Sets the default domain. */ function _textdomain($domain) { - global $default_domain; - $default_domain = $domain; + global $default_domain; + $default_domain = $domain; } /** * Lookup a message in the current domain. */ function _gettext($msgid) { - $l10n = _get_reader(); - //return $l10n->translate($msgid); - return _encode($l10n->translate($msgid)); + $l10n = _get_reader(); + return _encode($l10n->translate($msgid)); } + /** * Alias for gettext. */ function __($msgid) { - return _gettext($msgid); + return _gettext($msgid); } + /** * Plural version of gettext. */ function _ngettext($single, $plural, $number) { - $l10n = _get_reader(); - //return $l10n->ngettext($single, $plural, $number); - return _encode($l10n->ngettext($single, $plural, $number)); + $l10n = _get_reader(); + return _encode($l10n->ngettext($single, $plural, $number)); } /** * Override the current domain. */ function _dgettext($domain, $msgid) { - $l10n = _get_reader($domain); - //return $l10n->translate($msgid); - return _encode($l10n->translate($msgid)); + $l10n = _get_reader($domain); + return _encode($l10n->translate($msgid)); } + /** * Plural version of dgettext. */ function _dngettext($domain, $single, $plural, $number) { - $l10n = _get_reader($domain); - //return $l10n->ngettext($single, $plural, $number); - return _encode($l10n->ngettext($single, $plural, $number)); + $l10n = _get_reader($domain); + return _encode($l10n->ngettext($single, $plural, $number)); } /** * Overrides the domain and category for a single lookup. */ function _dcgettext($domain, $msgid, $category) { - $l10n = _get_reader($domain, $category); - //return $l10n->translate($msgid); - return _encode($l10n->translate($msgid)); + $l10n = _get_reader($domain, $category); + return _encode($l10n->translate($msgid)); } /** * Plural version of dcgettext. */ function _dcngettext($domain, $single, $plural, $number, $category) { - $l10n = _get_reader($domain, $category); - //return $l10n->ngettext($single, $plural, $number); - return _encode($l10n->ngettext($single, $plural, $number)); + $l10n = _get_reader($domain, $category); + return _encode($l10n->ngettext($single, $plural, $number)); } +/** + * Context version of gettext. + */ +function _pgettext($context, $msgid) { + $l10n = _get_reader(); + return _encode($l10n->pgettext($context, $msgid)); +} +/** + * Override the current domain in a context gettext call. + */ +function _dpgettext($domain, $context, $msgid) { + $l10n = _get_reader($domain); + return _encode($l10n->pgettext($context, $msgid)); +} -// Wrappers to use if the standard gettext functions are available, but the current locale is not supported by the system. -// Use the standard impl if the current locale is supported, use the custom impl otherwise. +/** + * Overrides the domain and category for a single context-based lookup. + */ +function _dcpgettext($domain, $context, $msgid, $category) { + $l10n = _get_reader($domain, $category); + return _encode($l10n->pgettext($context, $msgid)); +} + +/** + * Context version of ngettext. + */ +function _npgettext($context, $singular, $plural) { + $l10n = _get_reader(); + return _encode($l10n->npgettext($context, $singular, $plural)); +} + +/** + * Override the current domain in a context ngettext call. + */ +function _dnpgettext($domain, $context, $singular, $plural) { + $l10n = _get_reader($domain); + return _encode($l10n->npgettext($context, $singular, $plural)); +} + +/** + * Overrides the domain and category for a plural context-based lookup. + */ +function _dcnpgettext($domain, $context, $singular, $plural, $category) { + $l10n = _get_reader($domain, $category); + return _encode($l10n->npgettext($context, $singular, $plural)); +} + + + +// Wrappers to use if the standard gettext functions are available, +// but the current locale is not supported by the system. +// Use the standard impl if the current locale is supported, use the +// custom impl otherwise. function T_setlocale($category, $locale) { return _setlocale($category, $locale); } function T_bindtextdomain($domain, $path) { - if (_check_locale()) return bindtextdomain($domain, $path); - else return _bindtextdomain($domain, $path); + if (_check_locale_and_function()) return bindtextdomain($domain, $path); + else return _bindtextdomain($domain, $path); } function T_bind_textdomain_codeset($domain, $codeset) { // bind_textdomain_codeset is available only in PHP 4.2.0+ - if (_check_locale() and function_exists('bind_textdomain_codeset')) return bind_textdomain_codeset($domain, $codeset); - else return _bind_textdomain_codeset($domain, $codeset); + if (_check_locale_and_function('bind_textdomain_codeset')) + return bind_textdomain_codeset($domain, $codeset); + else return _bind_textdomain_codeset($domain, $codeset); } function T_textdomain($domain) { - if (_check_locale()) return textdomain($domain); - else return _textdomain($domain); + if (_check_locale_and_function()) return textdomain($domain); + else return _textdomain($domain); } function T_gettext($msgid) { - if (_check_locale()) return gettext($msgid); - else return _gettext($msgid); + if (_check_locale_and_function()) return gettext($msgid); + else return _gettext($msgid); } function T_($msgid) { - if (_check_locale()) return _($msgid); - return __($msgid); + if (_check_locale_and_function()) return _($msgid); + return __($msgid); } function T_ngettext($single, $plural, $number) { - if (_check_locale()) return ngettext($single, $plural, $number); - else return _ngettext($single, $plural, $number); + if (_check_locale_and_function()) + return ngettext($single, $plural, $number); + else return _ngettext($single, $plural, $number); } function T_dgettext($domain, $msgid) { - if (_check_locale()) return dgettext($domain, $msgid); - else return _dgettext($domain, $msgid); + if (_check_locale_and_function()) return dgettext($domain, $msgid); + else return _dgettext($domain, $msgid); } function T_dngettext($domain, $single, $plural, $number) { - if (_check_locale()) return dngettext($domain, $single, $plural, $number); - else return _dngettext($domain, $single, $plural, $number); + if (_check_locale_and_function()) + return dngettext($domain, $single, $plural, $number); + else return _dngettext($domain, $single, $plural, $number); } function T_dcgettext($domain, $msgid, $category) { - if (_check_locale()) return dcgettext($domain, $msgid, $category); - else return _dcgettext($domain, $msgid, $category); + if (_check_locale_and_function()) + return dcgettext($domain, $msgid, $category); + else return _dcgettext($domain, $msgid, $category); } function T_dcngettext($domain, $single, $plural, $number, $category) { - if (_check_locale()) return dcngettext($domain, $single, $plural, $number, $category); - else return _dcngettext($domain, $single, $plural, $number, $category); + if (_check_locale_and_function()) + return dcngettext($domain, $single, $plural, $number, $category); + else return _dcngettext($domain, $single, $plural, $number, $category); +} + +function T_pgettext($context, $msgid) { + if (_check_locale_and_function('pgettext')) + return pgettext($context, $msgid); + else + return _pgettext($context, $msgid); +} + +function T_dpgettext($domain, $context, $msgid) { + if (_check_locale_and_function('dpgettext')) + return dpgettext($domain, $context, $msgid); + else + return _dpgettext($domain, $context, $msgid); +} + +function T_dcpgettext($domain, $context, $msgid, $category) { + if (_check_locale_and_function('dcpgettext')) + return dcpgettext($domain, $context, $msgid, $category); + else + return _dcpgettext($domain, $context, $msgid, $category); +} + +function T_npgettext($context, $singular, $plural) { + if (_check_locale_and_function('npgettext')) + return npgettext($context, $single, $plural, $number); + else + return _npgettext($context, $single, $plural, $number); +} + +function T_dnpgettext($domain, $context, $singular, $plural) { + if (_check_locale_and_function('dnpgettext')) + return dnpgettext($domain, $context, $single, $plural, $number); + else + return _dnpgettext($domain, $context, $single, $plural, $number); +} + +function T_dcnpgettext($domain, $context, $singular, $plural, $category) { + if (_check_locale_and_function('dcnpgettext')) + return dcnpgettext($domain, $context, $single, + $plural, $number, $category); + else + return _dcnpgettext($domain, $context, $single, + $plural, $number, $category); } @@ -282,36 +479,56 @@ function T_dcngettext($domain, $single, $plural, $number, $category) { // Wrappers used as a drop in replacement for the standard gettext functions if (!function_exists('gettext')) { - function bindtextdomain($domain, $path) { - return _bindtextdomain($domain, $path); - } - function bind_textdomain_codeset($domain, $codeset) { - return _bind_textdomain_codeset($domain, $codeset); - } - function textdomain($domain) { - return _textdomain($domain); - } - function gettext($msgid) { - return _gettext($msgid); - } - function _($msgid) { - return __($msgid); - } - function ngettext($single, $plural, $number) { - return _ngettext($single, $plural, $number); - } - function dgettext($domain, $msgid) { - return _dgettext($domain, $msgid); - } - function dngettext($domain, $single, $plural, $number) { - return _dngettext($domain, $single, $plural, $number); - } - function dcgettext($domain, $msgid, $category) { - return _dcgettext($domain, $msgid, $category); - } - function dcngettext($domain, $single, $plural, $number, $category) { - return _dcngettext($domain, $single, $plural, $number, $category); - } -} - -?> \ No newline at end of file + function bindtextdomain($domain, $path) { + return _bindtextdomain($domain, $path); + } + function bind_textdomain_codeset($domain, $codeset) { + return _bind_textdomain_codeset($domain, $codeset); + } + function textdomain($domain) { + return _textdomain($domain); + } + function gettext($msgid) { + return _gettext($msgid); + } + function _($msgid) { + return __($msgid); + } + function ngettext($single, $plural, $number) { + return _ngettext($single, $plural, $number); + } + function dgettext($domain, $msgid) { + return _dgettext($domain, $msgid); + } + function dngettext($domain, $single, $plural, $number) { + return _dngettext($domain, $single, $plural, $number); + } + function dcgettext($domain, $msgid, $category) { + return _dcgettext($domain, $msgid, $category); + } + function dcngettext($domain, $single, $plural, $number, $category) { + return _dcngettext($domain, $single, $plural, $number, $category); + } + function pgettext($context, $msgid) { + return _pgettext($context, $msgid); + } + function npgettext($context, $single, $plural, $number) { + return _npgettext($context, $single, $plural, $number); + } + function dpgettext($domain, $context, $msgid) { + return _dpgettext($domain, $context, $msgid); + } + function dnpgettext($domain, $context, $single, $plural, $number) { + return _dnpgettext($domain, $context, $single, $plural, $number); + } + function dcpgettext($domain, $context, $msgid, $category) { + return _dcpgettext($domain, $context, $msgid, $category); + } + function dcnpgettext($domain, $context, $single, $plural, + $number, $category) { + return _dcnpgettext($domain, $context, $single, $plural, + $number, $category); + } +} + +?> diff --git a/includes/php-gettext/gettext.php b/includes/php-gettext/gettext.php old mode 100644 new mode 100755 index ad94a98..a121f9c --- a/includes/php-gettext/gettext.php +++ b/includes/php-gettext/gettext.php @@ -1,8 +1,8 @@ . + Copyright (c) 2003, 2009 Danilo Segan . Copyright (c) 2005 Nico Kaiser - + This file is part of PHP-gettext. PHP-gettext is free software; you can redistribute it and/or modify @@ -20,13 +20,13 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - + /** * Provides a simple gettext replacement that works independently from * the system's gettext abilities. * It can read MO files and use them for translating strings. * The files are passed to gettext_reader as a Stream (see streams.php) - * + * * This version has the ability to cache all strings and translations to * speed up the string lookup. * While the cache is enabled by default, it can be switched off with the @@ -36,7 +36,7 @@ class gettext_reader { //public: var $error = 0; // public variable that holds error code (0 if no error) - + //private: var $BYTEORDER = 0; // 0: low endian, 1: big endian var $STREAM = NULL; @@ -52,27 +52,33 @@ class gettext_reader { /* Methods */ - - + + /** * Reads a 32bit Integer from the Stream - * + * * @access private * @return Integer from the Stream */ function readint() { if ($this->BYTEORDER == 0) { // low endian - return array_shift(unpack('V', $this->STREAM->read(4))); + $input=unpack('V', $this->STREAM->read(4)); + return array_shift($input); } else { // big endian - return array_shift(unpack('N', $this->STREAM->read(4))); + $input=unpack('N', $this->STREAM->read(4)); + return array_shift($input); } } + function read($bytes) { + return $this->STREAM->read($bytes); + } + /** * Reads an array of Integers from the Stream - * + * * @param int count How many elements should be read * @return Array of Integers */ @@ -85,10 +91,10 @@ class gettext_reader { return unpack('N'.$count, $this->STREAM->read(4 * $count)); } } - + /** * Constructor - * + * * @param object Reader the StreamReader object * @param boolean enable_cache Enable or disable caching of strings (default on) */ @@ -98,39 +104,37 @@ class gettext_reader { $this->short_circuit = true; return; } - + // Caching can be turned off $this->enable_cache = $enable_cache; - // $MAGIC1 = (int)0x950412de; //bug in PHP 5 - $MAGIC1 = (int) - 1794895138; - // $MAGIC2 = (int)0xde120495; //bug - $MAGIC2 = (int) - 569244523; + $MAGIC1 = "\x95\x04\x12\xde"; + $MAGIC2 = "\xde\x12\x04\x95"; $this->STREAM = $Reader; - $magic = $this->readint(); + $magic = $this->read(4); if ($magic == $MAGIC1) { - $this->BYTEORDER = 0; - } elseif ($magic == $MAGIC2) { $this->BYTEORDER = 1; + } elseif ($magic == $MAGIC2) { + $this->BYTEORDER = 0; } else { $this->error = 1; // not MO file return false; } - + // FIXME: Do we care about revision? We should. $revision = $this->readint(); - + $this->total = $this->readint(); $this->originals = $this->readint(); $this->translations = $this->readint(); } - + /** * Loads the translation tables from the MO file into the cache * If caching is enabled, also loads all strings into a cache * to speed up translation lookups - * + * * @access private */ function load_tables() { @@ -138,13 +142,17 @@ class gettext_reader { is_array($this->table_originals) && is_array($this->table_translations)) return; - + /* get original and translations tables */ - $this->STREAM->seekto($this->originals); - $this->table_originals = $this->readintarray($this->total * 2); - $this->STREAM->seekto($this->translations); - $this->table_translations = $this->readintarray($this->total * 2); - + if (!is_array($this->table_originals)) { + $this->STREAM->seekto($this->originals); + $this->table_originals = $this->readintarray($this->total * 2); + } + if (!is_array($this->table_translations)) { + $this->STREAM->seekto($this->translations); + $this->table_translations = $this->readintarray($this->total * 2); + } + if ($this->enable_cache) { $this->cache_translations = array (); /* read all strings in the cache */ @@ -157,10 +165,10 @@ class gettext_reader { } } } - + /** * Returns a string from the "originals" table - * + * * @access private * @param int num Offset number of original string * @return string Requested string if found, otherwise '' @@ -174,10 +182,10 @@ class gettext_reader { $data = $this->STREAM->read($length); return (string)$data; } - + /** * Returns a string from the "translations" table - * + * * @access private * @param int num Offset number of original string * @return string Requested string if found, otherwise '' @@ -191,10 +199,10 @@ class gettext_reader { $data = $this->STREAM->read($length); return (string)$data; } - + /** * Binary search for string - * + * * @access private * @param string string * @param int start (internally used in recursive function) @@ -232,10 +240,10 @@ class gettext_reader { return $this->find_string($string, $half, $end); } } - + /** * Translates a string - * + * * @access public * @param string string to be translated * @return string translated string (or original, if not found) @@ -243,8 +251,8 @@ class gettext_reader { function translate($string) { if ($this->short_circuit) return $string; - $this->load_tables(); - + $this->load_tables(); + if ($this->enable_cache) { // Caching enabled, get translated string from cache if (array_key_exists($string, $this->cache_translations)) @@ -261,17 +269,66 @@ class gettext_reader { } } + /** + * Sanitize plural form expression for use in PHP eval call. + * + * @access private + * @return string sanitized plural form expression + */ + function sanitize_plural_expression($expr) { + // Get rid of disallowed characters. + $expr = preg_replace('@[^a-zA-Z0-9_:;\(\)\?\|\&=!<>+*/\%-]@', '', $expr); + + // Add parenthesis for tertiary '?' operator. + $expr .= ';'; + $res = ''; + $p = 0; + for ($i = 0; $i < strlen($expr); $i++) { + $ch = $expr[$i]; + switch ($ch) { + case '?': + $res .= ' ? ('; + $p++; + break; + case ':': + $res .= ') : ('; + break; + case ';': + $res .= str_repeat( ')', $p) . ';'; + $p = 0; + break; + default: + $res .= $ch; + } + } + return $res; + } + + /** + * Parse full PO header and extract only plural forms line. + * + * @access private + * @return string verbatim plural form header field + */ + function extract_plural_forms_header_from_po_header($header) { + if (preg_match("/(^|\n)plural-forms: ([^\n]*)\n/i", $header, $regs)) + $expr = $regs[2]; + else + $expr = "nplurals=2; plural=n == 1 ? 0 : 1;"; + return $expr; + } + /** * Get possible plural forms from MO header - * + * * @access private * @return string plural form header */ function get_plural_forms() { - // lets assume message number 0 is header + // lets assume message number 0 is header // this is true, right? $this->load_tables(); - + // cache header field for plural forms if (! is_string($this->pluralheader)) { if ($this->enable_cache) { @@ -279,18 +336,15 @@ class gettext_reader { } else { $header = $this->get_translation_string(0); } - if (eregi("plural-forms: ([^\n]*)\n", $header, $regs)) - $expr = $regs[1]; - else - $expr = "nplurals=2; plural=n == 1 ? 0 : 1;"; - $this->pluralheader = $expr; + $expr = $this->extract_plural_forms_header_from_po_header($header); + $this->pluralheader = $this->sanitize_plural_expression($expr); } return $this->pluralheader; } /** * Detects which plural form to take - * + * * @access private * @param n count * @return int array index of the right plural form @@ -300,7 +354,7 @@ class gettext_reader { $string = str_replace('nplurals',"\$total",$string); $string = str_replace("n",$n,$string); $string = str_replace('plural',"\$plural",$string); - + $total = 0; $plural = 0; @@ -311,7 +365,7 @@ class gettext_reader { /** * Plural version of gettext - * + * * @access public * @param string single * @param string plural @@ -327,12 +381,12 @@ class gettext_reader { } // find out the appropriate form - $select = $this->select_string($number); - + $select = $this->select_string($number); + // this should contains all strings separated by NULLs - $key = $single.chr(0).$plural; - - + $key = $single . chr(0) . $plural; + + if ($this->enable_cache) { if (! array_key_exists($key, $this->cache_translations)) { return ($number != 1) ? $plural : $single; @@ -353,6 +407,15 @@ class gettext_reader { } } + function pgettext($context, $msgid) { + $key = $context . chr(4) . $msgid; + return $this->translate($key); + } + + function npgettext($context, $singular, $plural, $number) { + $singular = $context . chr(4) . $singular; + return $this->ngettext($singular, $plural, $number); + } } ?> diff --git a/includes/php-gettext/streams.php b/includes/php-gettext/streams.php index d57aac6..3cdc158 100644 --- a/includes/php-gettext/streams.php +++ b/includes/php-gettext/streams.php @@ -1,6 +1,6 @@ . + Copyright (c) 2003, 2005, 2006, 2009 Danilo Segan . This file is part of PHP-gettext. @@ -21,29 +21,29 @@ */ -// Simple class to wrap file streams, string streams, etc. -// seek is essential, and it should be byte stream + // Simple class to wrap file streams, string streams, etc. + // seek is essential, and it should be byte stream class StreamReader { // should return a string [FIXME: perhaps return array of bytes?] function read($bytes) { return false; } - + // should return new position function seekto($position) { return false; } - + // returns current position function currentpos() { return false; } - + // returns length of entire stream (limit for seekto()s) function length() { return false; } -} +}; class StringReader { var $_pos; @@ -78,7 +78,7 @@ class StringReader { return strlen($this->_str); } -} +}; class FileReader { @@ -93,8 +93,8 @@ class FileReader { $this->_pos = 0; $this->_fd = fopen($filename,'rb'); if (!$this->_fd) { - $this->error = 3; // Cannot read file, probably permissions - return false; + $this->error = 3; // Cannot read file, probably permissions + return false; } } else { $this->error = 2; // File doesn't exist @@ -108,13 +108,14 @@ class FileReader { // PHP 5.1.1 does not read more than 8192 bytes in one fread() // the discussions at PHP Bugs suggest it's the intended behaviour + $data = ''; while ($bytes > 0) { $chunk = fread($this->_fd, $bytes); $data .= $chunk; $bytes -= strlen($chunk); } $this->_pos = ftell($this->_fd); - + return $data; } else return ''; } @@ -137,9 +138,9 @@ class FileReader { fclose($this->_fd); } -} +}; -// Preloads entire file in memory first, then creates a StringReader +// Preloads entire file in memory first, then creates a StringReader // over it (it assumes knowledge of StringReader internals) class CachedFileReader extends StringReader { function CachedFileReader($filename) { @@ -149,8 +150,8 @@ class CachedFileReader extends StringReader { $fd = fopen($filename,'rb'); if (!$fd) { - $this->error = 3; // Cannot read file, probably permissions - return false; + $this->error = 3; // Cannot read file, probably permissions + return false; } $this->_str = fread($fd, $length); fclose($fd); @@ -160,7 +161,7 @@ class CachedFileReader extends StringReader { return false; } } -} +}; -?> \ No newline at end of file +?> diff --git a/includes/php-gettext/tests/LocalesTest.php b/includes/php-gettext/tests/LocalesTest.php new file mode 100644 index 0000000..3000286 --- /dev/null +++ b/includes/php-gettext/tests/LocalesTest.php @@ -0,0 +1,66 @@ +assertEquals('sr_RS', _setlocale(LC_MESSAGES, 0)); + + // For an existing locale, it never needs emulation. + putenv("LANG=C"); + _setlocale(LC_MESSAGES, ""); + $this->assertEquals(0, locale_emulation()); + + // If we set it to a non-existent locale, it still works, but uses + // emulation. + _setlocale(LC_MESSAGES, "xxx_XXX"); + $this->assertEquals('xxx_XXX', _setlocale(LC_MESSAGES, 0)); + $this->assertEquals(1, locale_emulation()); + } + + public function test_get_list_of_locales() + { + // For a locale containing country code, we prefer + // full locale name, but if that's not found, fall back + // to the language only locale name. + $this->assertEquals(array("sr_RS", "sr"), + get_list_of_locales("sr_RS")); + + // If language code is used, it's the only thing returned. + $this->assertEquals(array("sr"), + get_list_of_locales("sr")); + + // There is support for language and charset only. + $this->assertEquals(array("sr.UTF-8", "sr"), + get_list_of_locales("sr.UTF-8")); + + // It can also split out character set from the full locale name. + $this->assertEquals(array("sr_RS.UTF-8", "sr_RS", "sr"), + get_list_of_locales("sr_RS.UTF-8")); + + // There is support for @modifier in locale names as well. + $this->assertEquals(array("sr_RS.UTF-8@latin", "sr_RS@latin", "sr@latin", + "sr_RS.UTF-8", "sr_RS", "sr"), + get_list_of_locales("sr_RS.UTF-8@latin")); + + // We can pass in only language and modifier. + $this->assertEquals(array("sr@latin", "sr"), + get_list_of_locales("sr@latin")); + + + // If locale name is not following the regular POSIX pattern, + // it's used verbatim. + $this->assertEquals(array("something"), + get_list_of_locales("something")); + + // Passing in an empty string returns an empty array. + $this->assertEquals(array(), + get_list_of_locales("")); + } +} + +?> diff --git a/includes/php-gettext/tests/ParsingTest.php b/includes/php-gettext/tests/ParsingTest.php new file mode 100644 index 0000000..9b350b2 --- /dev/null +++ b/includes/php-gettext/tests/ParsingTest.php @@ -0,0 +1,43 @@ +assertEquals( + 'nplurals=2; plural=n == 1 ? 0 : 1;', + $parser->extract_plural_forms_header_from_po_header("")); + + // Extracting it from the middle of the header works. + $this->assertEquals( + 'nplurals=1; plural=0;', + $parser->extract_plural_forms_header_from_po_header( + "Content-type: text/html; charset=UTF-8\n" + ."Plural-Forms: nplurals=1; plural=0;\n" + ."Last-Translator: nobody\n" + )); + + // It's also case-insensitive. + $this->assertEquals( + 'nplurals=1; plural=0;', + $parser->extract_plural_forms_header_from_po_header( + "PLURAL-forms: nplurals=1; plural=0;\n" + )); + + // It falls back to default if it's not on a separate line. + $this->assertEquals( + 'nplurals=2; plural=n == 1 ? 0 : 1;', + $parser->extract_plural_forms_header_from_po_header( + "Content-type: text/html; charset=UTF-8" // note the missing \n here + ."Plural-Forms: nplurals=1; plural=0;\n" + ."Last-Translator: nobody\n" + )); + + } + +} +?> diff --git a/includes/utf8.php b/includes/utf8.php deleted file mode 100644 index 9ef8113..0000000 --- a/includes/utf8.php +++ /dev/null @@ -1,478 +0,0 @@ - - */ - -/** - * URL-Encode a filename to allow unicodecharacters - * - * Slashes are not encoded - * - * When the second parameter is true the string will - * be encoded only if non ASCII characters are detected - - * This makes it safe to run it multiple times on the - * same string (default is true) - * - * @author Andreas Gohr - * @see urlencode - */ -function utf8_encodeFN($file,$safe=true){ - if($safe && preg_match('#^[a-zA-Z0-9/_\-.%]+$#',$file)){ - return $file; - } - $file = urlencode($file); - $file = str_replace('%2F','/',$file); - return $file; -} - -/** - * URL-Decode a filename - * - * This is just a wrapper around urldecode - * - * @author Andreas Gohr - * @see urldecode - */ -function utf8_decodeFN($file){ - $file = urldecode($file); - return $file; -} - -/** - * Checks if a string contains 7bit ASCII only - * - * @author Andreas Gohr - */ -function utf8_isASCII($str){ - for($i=0; $i127) return false; - } - return true; -} - -/** - * Tries to detect if a string is in Unicode encoding - * - * @author - * @link http://www.php.net/manual/en/function.utf8-encode.php - */ -function utf8_check($Str) { - for ($i=0; $i - * @see strlen() - */ -function utf8_strlen($string){ - if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strlen')) - return mb_strlen($string,'utf-8'); - - $uni = utf8_to_unicode($string); - return count($uni); -} - -/** - * This is a unicode aware replacement for substr() - * - * Uses mb_string extension if available - * - * @author Andreas Gohr - * @see substr() - */ -function utf8_substr($str, $start, $length=null){ - if(!defined('UTF8_NOMBSTRING') && function_exists('mb_substr')) - return mb_substr($str,$start,$length,'utf-8'); - - $uni = utf8_to_unicode($str); - return unicode_to_utf8(array_slice($uni,$start,$length)); -} - -/** - * This is a unicode aware replacement for strtolower() - * - * Uses mb_string extension if available - * - * @author Andreas Gohr - * @see strtolower() - * @see utf8_strtoupper() - */ -function utf8_strtolower($string){ - if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strtolower')) - return mb_strtolower($string,'utf-8'); - - global $UTF8_UPPER_TO_LOWER; - $uni = utf8_to_unicode($string); - for ($i=0; $i < count($uni); $i++){ - if($UTF8_UPPER_TO_LOWER[$uni[$i]]){ - $uni[$i] = $UTF8_UPPER_TO_LOWER[$uni[$i]]; - } - } - return unicode_to_utf8($uni); -} - -/** - * This is a unicode aware replacement for strtoupper() - * - * Uses mb_string extension if available - * - * @author Andreas Gohr - * @see strtoupper() - * @see utf8_strtoupper() - */ -function utf8_strtoupper($string){ - if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strtolower')) - return mb_strtolower($string,'utf-8'); - - global $UTF8_LOWER_TO_UPPER; - $uni = utf8_to_unicode($string); - for ($i=0; $i < count($uni); $i++){ - if($UTF8_LOWER_TO_UPPER[$uni[$i]]){ - $uni[$i] = $UTF8_LOWER_TO_UPPER[$uni[$i]]; - } - } - return unicode_to_utf8($uni); -} - -/** - * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents - * - * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) - * letters. Default is to deaccent both cases ($case = 0) - * - * @author Andreas Gohr - */ -function utf8_deaccent($string,$case=0){ - if($case <= 0){ - global $UTF8_LOWER_ACCENTS; - $string = str_replace(array_keys($UTF8_LOWER_ACCENTS),array_values($UTF8_LOWER_ACCENTS),$string); - } - if($case >= 0){ - global $UTF8_UPPER_ACCENTS; - $string = str_replace(array_keys($UTF8_UPPER_ACCENTS),array_values($UTF8_UPPER_ACCENTS),$string); - } - return $string; -} - -/** - * Removes special characters (nonalphanumeric) from a UTF-8 string - * - * Be sure to specify all specialchars you give in $repl in $keep, too - * or it won't work. - * - * This function adds the controlchars 0x00 to 0x19 to the array of - * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) - * - * @author Andreas Gohr - * @param string $string The UTF8 string to strip of special chars - * @param string $repl Replace special with this string - * @param string $keep Special chars to keep (in UTF8) - */ -function utf8_stripspecials($string,$repl='',$keep=''){ - global $UTF8_SPECIAL_CHARS; - if($keep != ''){ - $specials = array_diff($UTF8_SPECIAL_CHARS, utf8_to_unicode($keep)); - }else{ - $specials = $UTF8_SPECIAL_CHARS; - } - - $specials = unicode_to_utf8($specials); - $specials = preg_quote($specials, '/'); - - return preg_replace('/[\x00-\x19'.$specials.']/u',$repl,$string); -} - -/** - * This is an Unicode aware replacement for strpos - * - * Uses mb_string extension if available - * - * @author Scott Michael Reynen - * @author Andreas Gohr - * @link http://www.randomchaos.com/document.php?source=php_and_unicode - * @see strpos() - */ -function utf8_strpos($haystack, $needle,$offset=0) { - if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strpos')) - return mb_strpos($haystack,$needle,$offset,'utf-8'); - - $haystack = utf8_to_unicode($haystack); - $needle = utf8_to_unicode($needle); - $position = $offset; - $found = false; - - while( (! $found ) && ( $position < count( $haystack ) ) ) { - if ( $needle[0] == $haystack[$position] ) { - for ($i = 1; $i < count( $needle ); $i++ ) { - if ( $needle[$i] != $haystack[ $position + $i ] ) break; - } - if ( $i == count( $needle ) ) { - $found = true; - $position--; - } - } - $position++; - } - return ( $found == true ) ? $position : false; -} - -/** - * This function will any UTF-8 encoded text and return it as - * a list of Unicode values: - * - * @author Scott Michael Reynen - * @link http://www.randomchaos.com/document.php?source=php_and_unicode - * @see unicode_to_utf8() - */ -function utf8_to_unicode( $str ) { - $unicode = array(); - $values = array(); - $lookingFor = 1; - - for ($i = 0; $i < strlen( $str ); $i++ ) { - $thisValue = ord( $str[ $i ] ); - if ( $thisValue < 128 ) $unicode[] = $thisValue; - else { - if ( count( $values ) == 0 ) $lookingFor = ( $thisValue < 224 ) ? 2 : 3; - $values[] = $thisValue; - if ( count( $values ) == $lookingFor ) { - $number = ( $lookingFor == 3 ) ? - ( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ): - ( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 ); - $unicode[] = $number; - $values = array(); - $lookingFor = 1; - } - } - } - return $unicode; -} - -/** - * This function will convert a Unicode array back to its UTF-8 representation - * - * @author Scott Michael Reynen - * @link http://www.randomchaos.com/document.php?source=php_and_unicode - * @see utf8_to_unicode() - */ -function unicode_to_utf8( $str ) { - $utf8 = ''; - foreach( $str as $unicode ) { - if ( $unicode < 128 ) { - $utf8.= chr( $unicode ); - } elseif ( $unicode < 2048 ) { - $utf8.= chr( 192 + ( ( $unicode - ( $unicode % 64 ) ) / 64 ) ); - $utf8.= chr( 128 + ( $unicode % 64 ) ); - } else { - $utf8.= chr( 224 + ( ( $unicode - ( $unicode % 4096 ) ) / 4096 ) ); - $utf8.= chr( 128 + ( ( ( $unicode % 4096 ) - ( $unicode % 64 ) ) / 64 ) ); - $utf8.= chr( 128 + ( $unicode % 64 ) ); - } - } - return $utf8; -} - -/** - * UTF-8 Case lookup table - * - * This lookuptable defines the upper case letters to their correspponding - * lower case letter in UTF-8 - * - * @author Andreas Gohr - */ -$UTF8_LOWER_TO_UPPER = array( - 0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042, - 0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100, - 0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393, - 0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C, - 0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F, - 0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E, - 0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3, - 0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A, - 0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9, - 0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C, - 0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4, - 0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164, - 0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156, - 0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118, - 0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128, - 0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428, - 0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055, - 0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A, - 0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC, - 0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0, - 0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D, - 0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0, - 0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5, - 0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA, - 0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045, - 0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F, - 0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048, - 0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6, - 0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407, - 0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395, - 0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396, - 0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051, - 0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408, - 0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F, - 0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126, - 0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C, - 0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E, - 0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB, - 0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421, - 0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A, - 0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102, - 0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9, - 0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122, -); - -/** - * UTF-8 Case lookup table - * - * This lookuptable defines the lower case letters to their correspponding - * upper case letter in UTF-8 (it does so by flipping $UTF8_LOWER_TO_UPPER) - * - * @author Andreas Gohr - */ -$UTF8_UPPER_TO_LOWER = @array_flip($UTF8_LOWER_TO_UPPER); - -/** - * UTF-8 lookup table for lower case accented letters - * - * This lookuptable defines replacements for accented characters from the ASCII-7 - * range. This are lower case letters only. - * - * @author Andreas Gohr - * @see utf8_deaccent() - */ -$UTF8_LOWER_ACCENTS = array( - 'à ' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', - 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', - 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', - 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', - 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', - 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', - 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', - 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', - 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', - 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', - 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', - 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', - 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', - 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', - 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', -); - -/** - * UTF-8 lookup table for upper case accented letters - * - * This lookuptable defines replacements for accented characters from the ASCII-7 - * range. This are upper case letters only. - * - * @author Andreas Gohr - * @see utf8_deaccent() - */ -$UTF8_UPPER_ACCENTS = array( - 'à ' => 'A', 'ô' => 'O', 'ď' => 'D', 'ḟ' => 'F', 'ë' => 'E', 'š' => 'S', 'ơ' => 'O', - 'ß' => 'Ss', 'ă' => 'A', 'ř' => 'R', 'ț' => 'T', 'ň' => 'N', 'ā' => 'A', 'ķ' => 'K', - 'ŝ' => 'S', 'ỳ' => 'Y', 'ņ' => 'N', 'ĺ' => 'L', 'ħ' => 'H', 'ṗ' => 'P', 'ó' => 'O', - 'ú' => 'U', 'ě' => 'E', 'é' => 'E', 'ç' => 'C', 'ẁ' => 'W', 'ċ' => 'C', 'õ' => 'O', - 'ṡ' => 'S', 'ø' => 'O', 'ģ' => 'G', 'ŧ' => 'T', 'ș' => 'S', 'ė' => 'E', 'ĉ' => 'C', - 'ś' => 'S', 'î' => 'I', 'ű' => 'U', 'ć' => 'C', 'ę' => 'E', 'ŵ' => 'W', 'ṫ' => 'T', - 'ū' => 'U', 'č' => 'C', 'ö' => 'Oe', 'è' => 'E', 'ŷ' => 'Y', 'ą' => 'A', 'ł' => 'L', - 'ų' => 'U', 'ů' => 'U', 'ş' => 'S', 'ğ' => 'G', 'ļ' => 'L', 'ƒ' => 'F', 'ž' => 'Z', - 'ẃ' => 'W', 'ḃ' => 'B', 'å' => 'A', 'ì' => 'I', 'ï' => 'I', 'ḋ' => 'D', 'ť' => 'T', - 'ŗ' => 'R', 'ä' => 'Ae', 'í' => 'I', 'ŕ' => 'R', 'ê' => 'E', 'ü' => 'Ue', 'ò' => 'O', - 'ē' => 'E', 'ñ' => 'N', 'ń' => 'N', 'ĥ' => 'H', 'ĝ' => 'G', 'đ' => 'D', 'ĵ' => 'J', - 'ÿ' => 'Y', 'ũ' => 'U', 'ŭ' => 'U', 'ư' => 'U', 'ţ' => 'T', 'ý' => 'Y', 'ő' => 'O', - 'â' => 'A', 'ľ' => 'L', 'ẅ' => 'W', 'ż' => 'Z', 'ī' => 'I', 'ã' => 'A', 'ġ' => 'G', - 'ṁ' => 'M', 'ō' => 'O', 'ĩ' => 'I', 'ù' => 'U', 'į' => 'I', 'ź' => 'Z', 'á' => 'A', - 'û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', -); - -/** - * UTF-8 array of common special characters - * - * This array should contain all special characters (not a letter or digit) - * defined in the various local charsets - it's not a complete list of non-alphanum - * characters in UTF-8. It's not perfect but should match most cases of special - * chars. - * - * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! - * - * @author Andreas Gohr - * @see utf8_stripspecials() - */ -$UTF8_SPECIAL_CHARS = array( - 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, - 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d, - 0x002e, 0x002f, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, - 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0142, 0x007b, 0x007c, 0x007d, 0x007e, - 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, - 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, - 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, - 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, - 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, - 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, - 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, - 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, - 0x0385, 0x0387, 0x03b2, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, - 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, - 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, - 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, - 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, - 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, - 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, - 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, - 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, - 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, - 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, - 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, - 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, - 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, - 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, - 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, - 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, - 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, - 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, - 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, - 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, - 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, - 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, - 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, - 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, - 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, - 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, - 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, - 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, - 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, - 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, - 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, - 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, - 0x27be, 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, - 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, - 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, - 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, - 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, -); -?> diff --git a/includes/utf8/ChangeLog b/includes/utf8/ChangeLog new file mode 100644 index 0000000..326850b --- /dev/null +++ b/includes/utf8/ChangeLog @@ -0,0 +1,837 @@ +2007-08-12 03:20 tag RELEASE_0_5 + +2007-08-12 03:20 harryf + + * str_ireplace.php, tests/cases/utf8_str_ireplace.test.php: Fix + 1599859 at last - + http://sourceforge.net/tracker/index.php?func=detail&aid=1599859&group_id=142846&atid=753842 + +2007-08-12 03:11 harryf + + * native/core.php: Limit the number parts to two when exploding + which may give a small performance gain and reduce memory use - + thanks to Geert De Deckere for tip + +2006-10-25 16:57 tag RELEASE_0_4 + +2006-10-25 16:57 harryf + + * docs/examples/tables.php: Add sample for lookup tables + +2006-10-17 11:58 harryf + + * tests/cases/utf8_unicode.test.php: Add basic sanity checks + +2006-10-17 11:48 harryf + + * native/core.php: Move lookup tables to local statics instead of + globals + +2006-10-17 11:22 harryf + + * tests/cases/: utf8_strtoupper.test.php, + utf8_substr_replace.test.php: Fix class names when run tests + individually + +2006-10-17 11:09 harryf + + * tests/cases/utf8_specials.test.php: Add tests for + utf8_is_word_chars + +2006-10-17 10:53 harryf + + * tests/cases/utf8_ascii.test.php: Add test for empty string + +2006-10-17 10:53 harryf + + * tests/cases/utf8_specials.test.php: Add utf8_strip_specials tests + +2006-10-16 23:13 harryf + + * utils/specials.php: Eliminate the global + +2006-10-16 22:39 harryf + + * tests/cases/utf8_ascii.test.php: Add basic tests for + utf8_accents_to_ascii + +2006-10-16 22:38 harryf + + * utils/ascii.php: Fix bug 1568924 plus make this function actually + do something - used correct lookup tables, s/$string/$str - guess + no one using it yet, so no complaints ;) + +2006-10-01 02:01 tag RELEASE_0_3 + +2006-10-01 02:01 harryf + + * utils/position.php: Add Chris Smiths implementation - given str + and UTF-8 chr pos, returns corresponding byte index + +2006-10-01 01:53 harryf + + * tests/cases/utf8_ascii.test.php: Update to reflect change to + is_ascii - returning TRUE on empty strings + +2006-10-01 01:51 harryf + + * utils/ascii.php: Change is_ascii test - returns true on empty + strings - also changed regex so it doesnt capture matches in + memory + +2006-10-01 01:38 harryf + + * docs/phputf8.pod: Few extra links + +2006-10-01 01:16 harryf + + * TODO.tsk: Task update + +2006-10-01 01:16 harryf + + * README: Update note on license re Dokuwiki + +2006-10-01 01:16 harryf + + * DEPENDENCIES: Redundant + +2006-10-01 01:02 harryf + + * tests/data/bench/: strlen_mbstring_20061001.txt, + strlen_native_20061001.txt, strpos_mbstring_20061001.txt, + strpos_native_20061001.txt, strrpos_mbstring_20061001.txt, + strrpos_native_20061001.txt, substr_mbstring_20061001.txt, + substr_native_20061001.txt: Latest benchmarks + +2006-10-01 00:51 harryf + + * tests/cases/utf8_substr.test.php: Test overlong length + +2006-10-01 00:49 harryf + + * tests/cases/utf8_position.test.php: Tests for charpos to bytepos + fns + +2006-09-30 15:17 harryf + + * native/core.php: Another implementation thanks to Chris Smith / + dokuwiki, solving 65535 bug while preserving performance, plus + handling negative offset, lengths + +2006-09-27 23:34 harryf + + * native/core.php: Tentatively checking in a bugfixed version of + utf8_substr, now able to cope with 65535 - bug 1547780. + +2006-09-11 17:22 harryf + + * ord.php: Fix error reporting - zero index + +2006-09-11 17:14 harryf + + * utf8.php: Add runtime check to confirm PCRE is compiled with + UTF-8 support + +2006-09-03 11:25 harryf + + * str_pad.php, tests/cases/utf8_str_pad.test.php: Adding Oliver + Saunders str_pad implementation + +2006-04-09 23:22 harryf + + * ord.php: Add sequence length checks + +2006-03-27 23:02 harryf + + * tests/cases/utf8_ascii.test.php: Add sanity tests + +2006-03-27 23:02 harryf + + * utils/ascii.php: Add note on use of utf8_strip_ascii_ctrl + +2006-03-27 00:26 harryf + + * tests/cases/utf8_ord.test.php: Add some tests for utf8_ord + +2006-03-27 00:25 harryf + + * ord.php: Further reduce num calls to ord + +2006-03-21 01:50 harryf + + * docs/: makeDocs.pl, print.css, screen.css: Supporting doc stuff + +2006-03-19 00:30 harryf + + * docs/phputf8.pod: First pass on docs complete + +2006-03-18 14:32 harryf + + * docs/phputf8.pod: More of handling bad bytes + +2006-03-18 13:47 tag RELEASE_0_2 + +2006-03-18 13:47 harryf + + * utils/ascii.php: Initial implementation of ctrl codes stripper - + better done with preg_replace? + +2006-03-18 00:55 harryf + + * docs/phputf8.pod: fix pod syntax + +2006-03-18 00:37 harryf + + * docs/phputf8.pod: A little more on validation - poorly written + but anyway + +2006-03-18 00:36 harryf + + * README, TODO.tsk: Stay up to date + +2006-03-18 00:36 harryf + + * ord.php: Adding ord for utf-8 + +2006-03-17 00:04 harryf + + * docs/phputf8.pod: Adding place holders + +2006-03-16 23:57 harryf + + * docs/phputf8.pod: Getting started on docs + +2006-02-28 23:12 harryf + + * utf8.php, mbstring/case.php, mbstring/core.php, + mbstring/strlen.php, native/case.php, native/core.php, + native/strlen.php, tests/benchconfig.php, tests/config.php: + Reduce the number of includes + +2006-02-27 11:01 harryf + + * tests/: benchconfig.php, config.php: Remove superflous defines + +2006-02-27 01:00 harryf + + * ascii.php, bad.php, unicode.php, validation.php: Remove outdated + files + +2006-02-27 00:59 tag RELEASE_0_1 + +2006-02-27 00:59 harryf + + * native/strlen.php: Add newline at end + +2006-02-27 00:58 harryf + + * docs/examples/1.php: Add example of validation + +2006-02-27 00:56 harryf + + * docs/examples/1.php: Add simple example + +2006-02-27 00:46 harryf + + * TODO.tsk: Update TODO list + +2006-02-27 00:38 harryf + + * tests/benchconfig.php, tests/config.php, tests/index.php, + utf8.php: Eliminate iconv + +2006-02-27 00:37 harryf + + * tests/data/bench/: strlen_mbstring_20060226.txt, + strlen_native_20060226.txt, strpos_mbstring_20060226.txt, + strpos_native_20060226.txt, strrpos_mbstring_20060226.txt, + strrpos_native_20060226.txt, substr_mbstring_20060226.txt, + substr_native_20060226.txt: Add benchmark results + +2006-02-27 00:37 harryf + + * tests/bench/substr.php: Add substr bench + +2006-02-27 00:17 harryf + + * tests/bench/: strpos.php, strrpos.php: Add benchmark scripts for + strpos and strrpos + +2006-02-26 23:15 harryf + + * tests/: benchconfig.php, config.php, bench/strlen.php: Rejigging + around mb_strlen and starting benchmarks + +2006-02-26 23:13 harryf + + * utf8.php: Place higher priority on mbstring + +2006-02-26 23:13 harryf + + * mbstring/: README, strlen.php: Adding mb_strlen implementation + +2006-02-26 22:47 harryf + + * tests/config.php: Make sure encoding is set correctly + +2006-02-26 15:07 harryf + + * tests/: config.php, index.php, runtests.php: Allow any test to + run against a particular engine + +2006-02-26 14:56 harryf + + * README: More README notes + +2006-02-26 14:39 harryf + + * tests/cases/: utf8_ascii.test.php, utf8_bad.test.php, + utf8_validation.test.php: New file locations + +2006-02-26 14:39 harryf + + * tests/: index.php, runtests.php: Add option to run against + specific engine + +2006-02-26 14:20 harryf + + * utils/: bad.php, unicode.php, validation.php: PHP @version doc + tags + +2006-02-26 14:17 harryf + + * utils/: ascii.php, bad.php, specials.php, unicode.php, utils.php, + validation.php: More moving files around + +2006-02-26 14:07 harryf + + * tests/cases/: utf8_ucfirst.test.php, utf8_ucwords.test.php: test + for one char only + +2006-02-25 15:52 harryf + + * tests/cases/: utf8_ascii.test.php, utf8_bad.test.php, + utf8_str_ireplace.test.php, utf8_str_split.test.php, + utf8_strcasecmp.test.php, utf8_strcspn.test.php, + utf8_stristr.test.php, utf8_strrev.test.php, + utf8_strspn.test.php, utf8_substr_replace.test.php, + utf8_trim.test.php, utf8_ucfirst.test.php, utf8_ucwords.test.php, + utf8_validation.test.php: Modify to reflect file / constant + renaming + +2006-02-25 15:28 harryf + + * tests/cases/: utf8_native_str_ireplace.test.php, + utf8_native_str_split.test.php, utf8_native_strcasecmp.test.php, + utf8_native_strcspn.test.php, utf8_native_stristr.test.php, + utf8_native_strrev.test.php, utf8_native_strspn.test.php, + utf8_native_substr_replace.test.php, utf8_native_trim.test.php, + utf8_native_ucfirst.test.php, utf8_native_ucwords.test.php, + utf8_str_ireplace.test.php, utf8_str_split.test.php, + utf8_strcasecmp.test.php, utf8_strcspn.test.php, + utf8_stristr.test.php, utf8_strrev.test.php, + utf8_strspn.test.php, utf8_substr_replace.test.php, + utf8_trim.test.php, utf8_ucfirst.test.php, utf8_ucwords.test.php: + Renaming test files + +2006-02-25 15:22 harryf + + * tests/config.php: Use correct constant, remove set encodings + +2006-02-25 15:20 harryf + + * ascii.php, bad.php, unicode.php, utf8_ascii.php, utf8_bad.php, + utf8_patterns.php, utf8_unicode.php, utf8_utils.php, + utf8_validation.php, validation.php, utils/patterns.php, + utils/utils.php: File renaming + +2006-02-25 15:14 harryf + + * utf8.php: Rename directory constant and use control constants + +2006-02-25 15:07 harryf + + * mbstring/: core.php, case.php: Add docs / control constant + +2006-02-25 15:05 harryf + + * mbstring/README: Add note for the picky + +2006-02-25 14:54 harryf + + * native/: case.php, core.php, strlen.php: Add control constants + +2006-02-25 14:50 harryf + + * native/: utf8_str_ireplace.php, utf8_str_split.php, + utf8_strcasecmp.php, utf8_strcspn.php, utf8_stristr.php, + utf8_strrev.php, utf8_strspn.php, utf8_substr_replace.php, + utf8_trim.php, utf8_ucfirst.php, utf8_ucwords.php: Moved to + parent directory + +2006-02-25 14:50 harryf + + * str_ireplace.php, str_split.php, strcasecmp.php, strcspn.php, + stristr.php, strrev.php, strspn.php, substr_replace.php, + trim.php, ucfirst.php, ucwords.php: Renaming / moving from native + subdir + +2006-02-25 14:28 harryf + + * native/strlen.php: Add constant to control loading + +2006-02-25 14:26 harryf + + * native/: strlen.php, utf8_strlen.php: Begin file renaming + +2006-02-25 14:21 harryf + + * native/utf8_ucfirst.php: Bug fixes so tests pass + +2006-02-25 00:43 harryf + + * utf8.php: Modify to reflect native/core.php and native/case.php + +2006-02-25 00:42 harryf + + * tests/config.php: Make sure HTMLReporter is producing UTF-8 + +2006-02-25 00:39 harryf + + * tests/index.php: Add simple browser for tests + +2006-02-25 00:33 harryf + + * tests/cases/: utf8_ascii.test.php, + utf8_native_str_ireplace.test.php, + utf8_native_str_split.test.php, utf8_native_strcasecmp.test.php, + utf8_native_strcspn.test.php, utf8_native_stristr.test.php, + utf8_native_strrev.test.php, utf8_native_strspn.test.php, + utf8_native_substr_replace.test.php, utf8_native_trim.test.php, + utf8_native_ucfirst.test.php, utf8_native_ucwords.test.php, + utf8_strlen.test.php, utf8_strpos.test.php, + utf8_strrpos.test.php, utf8_strtolower.test.php, + utf8_strtoupper.test.php, utf8_substr.test.php, + utf8_validation.test.php: Fix copy and paste gotchas + +2006-02-25 00:32 harryf + + * tests/runtests.php: Add runner for all tests + +2006-02-25 00:08 harryf + + * tests/cases/utf8_bad.test.php: Fix constant, modify way + config.php is loaded + +2006-02-25 00:06 harryf + + * tests/cases/utf8_ascii.test.php: Correct constant and test name + +2006-02-25 00:04 harryf + + * tests/cases/: utf8_strtolower.test.php, utf8_strtoupper.test.php: + Correct test names + +2006-02-25 00:02 harryf + + * native/: case.php, utf8_strtolower.php, utf8_strtoupper.php: Move + utf8_strtoupper and utf8_strtolower into case.php + +2006-02-24 23:59 harryf + + * native/: utf8_strpos.php, utf8_strrpos.php, utf8_substr.php: + Remove files now merged into native/core.php + +2006-02-24 23:57 harryf + + * native/core.php: Moving utf8_strpos, utf8_strrpos and utf8_substr + to single file + +2006-02-24 23:51 harryf + + * TODO.tsk: Add todo list + +2006-02-24 23:36 harryf + + * native/utf8_substr.php: Handle lengths beyond end of string in + same way as substr + +2006-02-24 23:35 harryf + + * tests/cases/utf8_substr.test.php: Adjusts tests to expect same + behaviour as substr + +2006-02-24 23:12 harryf + + * utf8.php: Replace variable containing directory with constant + +2006-02-24 23:06 harryf + + * tests/cases/: utf8_strrpos.test.php, utf8_substr.test.php: Remove + requires + +2006-02-24 22:56 harryf + + * tests/config.php: Load core functions via utf8.php + +2006-02-24 22:55 harryf + + * tests/cases/: utf8_native_strlen.test.php, + utf8_native_strpos.test.php, utf8_native_strrpos.test.php, + utf8_native_strtolower.test.php, utf8_native_strtoupper.test.php, + utf8_native_substr.test.php, utf8_strlen.test.php, + utf8_strpos.test.php, utf8_strrpos.test.php, + utf8_strtolower.test.php, utf8_strtoupper.test.php, + utf8_substr.test.php: Begin test reorganisation + +2006-02-24 22:53 harryf + + * utf8_patterns.php: Add note that not used by rest of lib + +2006-02-24 22:52 harryf + + * utf8_bad.php: Embed bad regex in functions to eliminate include + +2006-02-24 16:13 harryf + + * DEPENDENCIES: Starting on dependency documentation + +2006-02-24 16:06 harryf + + * native/utf8_substr_replace.php: Document dependency on + utf8_strlen + +2006-02-24 16:03 harryf + + * native/utf8_substr.php: Fix error reporting + +2006-02-24 15:58 harryf + + * README: Add note on bug reporting and well formed utf8 + +2006-02-24 15:45 harryf + + * native/utf8_strrpos.php: Document dependency on utf8_strlen, + utf8_substr + +2006-02-24 15:28 harryf + + * native/utf8_strpos.php: Document dependency on utf8_strlen, + utf8_substr + +2006-02-24 15:25 harryf + + * native/utf8_stristr.php: Document dependency on utf8_strlen + +2006-02-24 15:22 harryf + + * native/utf8_strcasecmp.php: Document dependency on + utf8_strtolower + +2006-02-24 15:21 harryf + + * native/: utf8_str_split.php, utf8_strcspn.php: Document + dependency on utf8_strlen + +2006-02-24 15:18 harryf + + * native/: case.php, core.php: Remove unneeded files + +2006-02-24 15:16 harryf + + * README: Add a tiny bit of documentation + +2006-02-24 15:12 harryf + + * utf8.php: Add further notes on loading code + +2006-02-24 15:11 harryf + + * LICENSE, README: Add license and notes on license + +2006-02-24 15:02 harryf + + * utf8_unicode.php, utf8_validation.php, utf8_bad.php: Add better + attribution to license blocks + +2006-02-24 14:52 harryf + + * mbstring/core.php: Add support for offset to utf8_strrpos + +2006-02-24 14:52 harryf + + * native/utf8_strrpos.php: Change E_USER_ERROR to E_USER_WARNING to + match strrpos behaviour + +2005-12-09 22:32 harryf + + * mbstring/core.php: Switch to assume correct internal encoding + +2005-12-09 22:31 harryf + + * utf8_validation.php: Add further comments on 5 / 6 byte sequence + risks + +2005-12-09 22:29 harryf + + * mbstring/case.php: Add strtolower / upper wrappers + +2005-12-09 22:23 harryf + + * README: Fix linefeed issues with README + +2005-12-08 17:17 harryf + + * exp/regexunicode.php: Update so its actually working + +2005-12-08 17:00 harryf + + * utf8.php: Get this to a semi-working condition + +2005-12-08 16:44 harryf + + * common.php: Remove outdated common.php code + +2005-12-08 16:24 harryf + + * native/utf8_ucwords.php: Switch to use of preg_replace_callback + +2005-12-08 13:13 harryf + + * utf8_bad.php: Modified to load the patterns file + +2005-12-08 13:08 harryf + + * README: Add some notes to README + +2005-12-08 12:53 harryf + + * utf8_ascii.php: Add seperate implementations with or without + ASCII device control codes + +2005-12-08 12:49 harryf + + * utf8_utils.php: Adding utils from andreas gohr / dokuwiki + +2005-07-16 13:53 harryf + + * utf8_unicode.php: Fix phpdoc issue + +2005-07-16 13:53 harryf + + * utf8_bad.php: Fix bug when for incomplete sequence at end of + string and add a bunch of phpdoc comments + +2005-07-16 13:36 harryf + + * tests/cases/utf8_bad.test.php: Add some tests for + utf8_bad_identify - two failing + +2005-07-16 13:20 harryf + + * tests/cases/utf8_validation.test.php: Add tests for + utf8_compliant function + +2005-07-16 13:12 harryf + + * utf8_validation.php: Add alternative, faster but less strict + mechanism to validate a UTF-8 string + +2005-07-16 00:03 harryf + + * utf8_bad.php: Add routine to identify how the UTF-8 is bad + +2005-07-16 00:01 harryf + + * utf8_validation.php: Minor cleaning + +2005-07-15 22:49 harryf + + * utf8_patterns.php: Add api doc note to all patterns that ASCII + range has changed from original version + +2005-07-15 22:47 harryf + + * tests/cases/utf8_bad.test.php: Add a whole load more tests for + specific badly formed UTF-8 + +2005-07-15 22:46 harryf + + * utf8_patterns.php: Modify patterns to contain full ASCII range + +2005-07-15 17:16 harryf + + * tests/cases/utf8_validation.test.php: Add a bunch more tests for + specific invalid UTF-8 sequences and code points + +2005-07-15 17:15 harryf + + * utf8_validation.php: Dropping regex based implementation and + switching to implementation based on that from + http://hsivonen.iki.fi/php-utf8/ + +2005-07-15 14:27 harryf + + * native/utf8_ucwords.php: Add a little documentation of the regex + +2005-07-15 14:20 harryf + + * tests/cases/: utf8_native_ucfirst.test.php, + utf8_native_ucwords.test.php: Add linefeed tests + +2005-07-15 14:12 harryf + + * tests/cases/utf8_native_trim.test.php: Add linefeed tests + +2005-07-15 14:03 harryf + + * native/utf8_substr_replace.php: Handle linefeeds correctly + +2005-07-15 14:02 harryf + + * tests/cases/utf8_native_substr_replace.test.php: Add linefeed + test + +2005-07-15 13:58 harryf + + * tests/cases/utf8_native_substr.test.php: Add test of length + beyond string length + +2005-07-15 13:21 harryf + + * tests/cases/utf8_native_substr.test.php: Add linefeed test + +2005-07-15 13:14 harryf + + * tests/config.php: Update for PHP 4.4.0 and new error notice + +2005-07-12 09:44 harryf + + * tests/cases/utf8_native_strrpos.test.php: Add linefeed tests + +2005-07-12 09:44 harryf + + * native/utf8_strrpos.php: Remove commented print_r statement + +2005-07-12 09:40 harryf + + * native/utf8_strrev.php: Modify regex to handle linefeeds + +2005-07-12 09:39 harryf + + * tests/cases/utf8_native_strrev.test.php: Add linefeed test + +2005-07-12 09:36 harryf + + * tests/cases/: utf8_native_strcspn.test.php, + utf8_native_strspn.test.php: Add linefeed tests + +2005-07-12 09:32 harryf + + * native/utf8_str_split.php: Modify regex to handle linefeeds and + avoid adding null chars to end of string + +2005-07-12 09:26 harryf + + * tests/cases/utf8_native_str_split.test.php: Add test with + linefeeds + +2005-07-11 14:33 harryf + + * tests/cases/utf8_native_str_ireplace.test.php: Add linefeed tests + +2005-07-11 14:32 harryf + + * native/utf8_str_ireplace.php: Make regex . meta match new lines + +2005-07-11 14:17 harryf + + * native/: utf8_strcasecmp.php, utf8_stristr.php: utf-8 + implementations of stristr and strcasecmp + +2005-07-11 14:16 harryf + + * tests/cases/: utf8_native_strcasecmp.test.php, + utf8_native_stristr.test.php: Add tests + +2005-07-11 12:48 harryf + + * tests/cases/utf8_native_strcspn.test.php: Add further ascii test + +2005-07-11 12:47 harryf + + * tests/cases/: utf8_native_strcspn.test.php, + utf8_native_strspn.test.php: Adding tests for utf8_strcspn and + correction to utf8_strspn + +2005-07-11 12:46 harryf + + * native/: utf8_strcspn.php, utf8_strspn.php: Add missing /u + modifier to PCRE patterns + +2005-07-05 00:30 tag start + +2005-07-05 00:30 harryf + + * README, common.php, utf8.php, utf8_ascii.php, utf8_bad.php, + utf8_patterns.php, utf8_unicode.php, utf8_validation.php, + exp/regexunicode.php, mbstring/core.php, native/case.php, + native/core.php, native/utf8_str_ireplace.php, + native/utf8_str_split.php, native/utf8_strcspn.php, + native/utf8_strlen.php, native/utf8_strpos.php, + native/utf8_strrev.php, native/utf8_strrpos.php, + native/utf8_strspn.php, native/utf8_strtolower.php, + native/utf8_strtoupper.php, native/utf8_substr.php, + native/utf8_substr_replace.php, native/utf8_trim.php, + native/utf8_ucfirst.php, native/utf8_ucwords.php, + tests/cli_reporter.php, tests/config.php, + tests/cases/utf8_ascii.test.php, tests/cases/utf8_bad.test.php, + tests/cases/utf8_native_str_ireplace.test.php, + tests/cases/utf8_native_str_split.test.php, + tests/cases/utf8_native_strlen.test.php, + tests/cases/utf8_native_strpos.test.php, + tests/cases/utf8_native_strrev.test.php, + tests/cases/utf8_native_strrpos.test.php, + tests/cases/utf8_native_strspn.test.php, + tests/cases/utf8_native_strtolower.test.php, + tests/cases/utf8_native_strtoupper.test.php, + tests/cases/utf8_native_substr.test.php, + tests/cases/utf8_native_substr_replace.test.php, + tests/cases/utf8_native_trim.test.php, + tests/cases/utf8_native_ucfirst.test.php, + tests/cases/utf8_native_ucwords.test.php, + tests/cases/utf8_validation.test.php, tests/data/utf8.html: + Initial import + +2005-07-05 00:30 harryf + + * README, common.php, utf8.php, utf8_ascii.php, utf8_bad.php, + utf8_patterns.php, utf8_unicode.php, utf8_validation.php, + exp/regexunicode.php, mbstring/core.php, native/case.php, + native/core.php, native/utf8_str_ireplace.php, + native/utf8_str_split.php, native/utf8_strcspn.php, + native/utf8_strlen.php, native/utf8_strpos.php, + native/utf8_strrev.php, native/utf8_strrpos.php, + native/utf8_strspn.php, native/utf8_strtolower.php, + native/utf8_strtoupper.php, native/utf8_substr.php, + native/utf8_substr_replace.php, native/utf8_trim.php, + native/utf8_ucfirst.php, native/utf8_ucwords.php, + tests/cli_reporter.php, tests/config.php, + tests/cases/utf8_ascii.test.php, tests/cases/utf8_bad.test.php, + tests/cases/utf8_native_str_ireplace.test.php, + tests/cases/utf8_native_str_split.test.php, + tests/cases/utf8_native_strlen.test.php, + tests/cases/utf8_native_strpos.test.php, + tests/cases/utf8_native_strrev.test.php, + tests/cases/utf8_native_strrpos.test.php, + tests/cases/utf8_native_strspn.test.php, + tests/cases/utf8_native_strtolower.test.php, + tests/cases/utf8_native_strtoupper.test.php, + tests/cases/utf8_native_substr.test.php, + tests/cases/utf8_native_substr_replace.test.php, + tests/cases/utf8_native_trim.test.php, + tests/cases/utf8_native_ucfirst.test.php, + tests/cases/utf8_native_ucwords.test.php, + tests/cases/utf8_validation.test.php, tests/data/utf8.html: + Initial revision + diff --git a/includes/utf8/LICENSE b/includes/utf8/LICENSE new file mode 100644 index 0000000..8add30a --- /dev/null +++ b/includes/utf8/LICENSE @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/includes/utf8/README b/includes/utf8/README new file mode 100644 index 0000000..8b533c2 --- /dev/null +++ b/includes/utf8/README @@ -0,0 +1,82 @@ +++PHP UTF-8++ + +Version 0.5 + +++DOCUMENTATION++ + +Documentation in progress in ./docs dir + +http://www.phpwact.org/php/i18n/charsets +http://www.phpwact.org/php/i18n/utf-8 + +Important Note: DO NOT use these functions without understanding WHY +you are using them. In particular, do not blindly replace all use of PHP's +string functions which functions found here - most of the time you will +not need to, and you will be introducing a significant performance +overhead to your application. You can get a good idea of when to use what +from reading: http://www.phpwact.org/php/i18n/utf-8 + +Important Note: For sake of performance most of the functions here are +not "defensive" (e.g. there is not extensive parameter checking, well +formed UTF-8 is assumed). This is particularily relevant when is comes to +catching badly formed UTF-8 - you should screen input on the "outer +perimeter" with help from functions in the utf8_validation.php and +utf8_bad.php files. + +Important Note: this library treats ALL ASCII characters as valid, including ASCII control characters. But if you use some ASCII control characters in XML, it will render the XML ill-formed. Don't be a bozo: http://hsivonen.iki.fi/producing-xml/#controlchar + +++BUGS / SUPPORT / FEATURE REQUESTS ++ + +Please report bugs to: +http://sourceforge.net/tracker/?group_id=142846&atid=753842 +- if you are able, please submit a failing unit test +(http://www.lastcraft.com/simple_test.php) with your bug report. + +For feature requests / faster implementation of functions found here, +please drop them in via the RFE tracker: http://sourceforge.net/tracker/?group_id=142846&atid=753845 +Particularily interested in faster implementations! + +For general support / help, use: +http://sourceforge.net/tracker/?group_id=142846&atid=753843 + +In the VERY WORST case, you can email me: hfuecks gmail com - I tend to be slow to respond though so be warned. + +Important Note: when reporting bugs, please provide the following +information; + +PHP version, whether the iconv extension is loaded (in PHP5 it's +there by default), whether the mbstring extension is loaded. The +following PHP script can be used to determine this information; + +"; +if ( extension_loaded('mbstring') ) { + print "mbstring available
"; +} else { + print "mbstring not available
"; +} +if ( extension_loaded('iconv') ) { + print "iconv available
"; +} else { + print "iconv not available
"; +} +?> + +++LICENSING++ + +Parts of the code in this library come from other places, under different +licenses. +The authors involved have been contacted (see below). Attribution for +which code came from elsewhere can be found in the source code itself. + ++Andreas Gohr / Chris Smith - Dokuwiki +There is a fair degree of collaboration / exchange of ideas and code +beteen Dokuwiki's UTF-8 library; +http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php +and phputf8. Although Dokuwiki is released under GPL, its UTF-8 +library is released under LGPL, hence no conflict with phputf8 + ++Henri Sivonen (http://hsivonen.iki.fi/php-utf8/ / +http://hsivonen.iki.fi/php-utf8/) has also given permission for his +code to be released under the terms of the LGPL. He ported a Unicode / UTF-8 +converter from the Mozilla codebase to PHP, which is re-used in phputf8 diff --git a/includes/utf8/TODO.tsk b/includes/utf8/TODO.tsk new file mode 100644 index 0000000..b683b54 --- /dev/null +++ b/includes/utf8/TODO.tsk @@ -0,0 +1,14 @@ + + Benchmark different implementations of functions against each other Comparisons of utf_* fns vs. the PHP str* functions for edge behaviour (e.g. sub_str with positions outside of string) Add further ways to "build" library to reduce number of includes necessary + +- this is now partially mitigated by utf.php which works pretty well, + +Leaving as is Stuff to add Where functions accept an integer character count as an argument, would be good to have byte index versions as well - can be useful in some instances where you're mingling with native str functions Add the "romanize" stuff + +Superceded by utf8 to ascii packageUTF-8 char to ordinal Potential faster implementations re. Marek Gayer approach described here + +http://www.sitepoint.com/blogs/2006/08/10/hot-php-utf-8-tips/ Implement Chris Smith's solution + +Added to utils/position.php http://sourceforge.net/tracker/index.php?func=detail&aid=1547780&group_id=142846&atid=753842 + + \ No newline at end of file diff --git a/includes/utf8/docs/examples/1.php b/includes/utf8/docs/examples/1.php new file mode 100644 index 0000000..11ec77d --- /dev/null +++ b/includes/utf8/docs/examples/1.php @@ -0,0 +1,33 @@ +\n"; + +# Check it's a well formed UTF-8 string +require_once UTF8 . '/utils/validation.php'; +if ( utf8_is_valid($str) ) { + print "It's well formed UTF-8
\n"; +} else { + print "It's badly formed UTF-8 - this shouldn't happen
\n"; +} + +print "Num chars: ".utf8_strlen($str)."
\n"; + +print "Uppercase: ".utf8_strtoupper($str)."
\n"; + +# Load an additional function; +require_once UTF8 . '/strrev.php'; + +print "Reversed: ".utf8_strrev($str)."
\n"; +?> \ No newline at end of file diff --git a/includes/utf8/docs/examples/tables.php b/includes/utf8/docs/examples/tables.php new file mode 100644 index 0000000..488cecc --- /dev/null +++ b/includes/utf8/docs/examples/tables.php @@ -0,0 +1,240 @@ +'; + + $o .= ''; + foreach ( range(1,10) as $i ) { + $o.="FromTo"; + } + $o .= "\n"; + + $sep = ''; + $j = 0; + foreach ( $hash as $k => $v ) { + if ( $j % 10 == 0 ) { + $j = 0; + $o.=$sep; + $sep = ''; + } + $o .= sprintf( + "%s%s", + $lambdaX($k), + $lambdaY($v) + ); + $j++; + } + + return $o . ''; +} + +function drawListTable ($list, $lambda) { + $o = ''; + $sep = ''; + $j = 0; + foreach ( $list as $c ) { + if ( $j % 10 == 0 ) { + $j = 0; + $o.=$sep; + $sep = ''; + } + $o .= sprintf("",$lambda($c)); + $j++; + } + return $o . '
%s
'; +} + +$chrtoutf8 = create_function('$c','return utf8_from_unicode(array($c));'); +$null = create_function('$c','return $c;'); + +$UTF8_UPPER_TO_LOWER = array( + 0x0041=>0x0061, 0x03A6=>0x03C6, 0x0162=>0x0163, 0x00C5=>0x00E5, 0x0042=>0x0062, + 0x0139=>0x013A, 0x00C1=>0x00E1, 0x0141=>0x0142, 0x038E=>0x03CD, 0x0100=>0x0101, + 0x0490=>0x0491, 0x0394=>0x03B4, 0x015A=>0x015B, 0x0044=>0x0064, 0x0393=>0x03B3, + 0x00D4=>0x00F4, 0x042A=>0x044A, 0x0419=>0x0439, 0x0112=>0x0113, 0x041C=>0x043C, + 0x015E=>0x015F, 0x0143=>0x0144, 0x00CE=>0x00EE, 0x040E=>0x045E, 0x042F=>0x044F, + 0x039A=>0x03BA, 0x0154=>0x0155, 0x0049=>0x0069, 0x0053=>0x0073, 0x1E1E=>0x1E1F, + 0x0134=>0x0135, 0x0427=>0x0447, 0x03A0=>0x03C0, 0x0418=>0x0438, 0x00D3=>0x00F3, + 0x0420=>0x0440, 0x0404=>0x0454, 0x0415=>0x0435, 0x0429=>0x0449, 0x014A=>0x014B, + 0x0411=>0x0431, 0x0409=>0x0459, 0x1E02=>0x1E03, 0x00D6=>0x00F6, 0x00D9=>0x00F9, + 0x004E=>0x006E, 0x0401=>0x0451, 0x03A4=>0x03C4, 0x0423=>0x0443, 0x015C=>0x015D, + 0x0403=>0x0453, 0x03A8=>0x03C8, 0x0158=>0x0159, 0x0047=>0x0067, 0x00C4=>0x00E4, + 0x0386=>0x03AC, 0x0389=>0x03AE, 0x0166=>0x0167, 0x039E=>0x03BE, 0x0164=>0x0165, + 0x0116=>0x0117, 0x0108=>0x0109, 0x0056=>0x0076, 0x00DE=>0x00FE, 0x0156=>0x0157, + 0x00DA=>0x00FA, 0x1E60=>0x1E61, 0x1E82=>0x1E83, 0x00C2=>0x00E2, 0x0118=>0x0119, + 0x0145=>0x0146, 0x0050=>0x0070, 0x0150=>0x0151, 0x042E=>0x044E, 0x0128=>0x0129, + 0x03A7=>0x03C7, 0x013D=>0x013E, 0x0422=>0x0442, 0x005A=>0x007A, 0x0428=>0x0448, + 0x03A1=>0x03C1, 0x1E80=>0x1E81, 0x016C=>0x016D, 0x00D5=>0x00F5, 0x0055=>0x0075, + 0x0176=>0x0177, 0x00DC=>0x00FC, 0x1E56=>0x1E57, 0x03A3=>0x03C3, 0x041A=>0x043A, + 0x004D=>0x006D, 0x016A=>0x016B, 0x0170=>0x0171, 0x0424=>0x0444, 0x00CC=>0x00EC, + 0x0168=>0x0169, 0x039F=>0x03BF, 0x004B=>0x006B, 0x00D2=>0x00F2, 0x00C0=>0x00E0, + 0x0414=>0x0434, 0x03A9=>0x03C9, 0x1E6A=>0x1E6B, 0x00C3=>0x00E3, 0x042D=>0x044D, + 0x0416=>0x0436, 0x01A0=>0x01A1, 0x010C=>0x010D, 0x011C=>0x011D, 0x00D0=>0x00F0, + 0x013B=>0x013C, 0x040F=>0x045F, 0x040A=>0x045A, 0x00C8=>0x00E8, 0x03A5=>0x03C5, + 0x0046=>0x0066, 0x00DD=>0x00FD, 0x0043=>0x0063, 0x021A=>0x021B, 0x00CA=>0x00EA, + 0x0399=>0x03B9, 0x0179=>0x017A, 0x00CF=>0x00EF, 0x01AF=>0x01B0, 0x0045=>0x0065, + 0x039B=>0x03BB, 0x0398=>0x03B8, 0x039C=>0x03BC, 0x040C=>0x045C, 0x041F=>0x043F, + 0x042C=>0x044C, 0x00DE=>0x00FE, 0x00D0=>0x00F0, 0x1EF2=>0x1EF3, 0x0048=>0x0068, + 0x00CB=>0x00EB, 0x0110=>0x0111, 0x0413=>0x0433, 0x012E=>0x012F, 0x00C6=>0x00E6, + 0x0058=>0x0078, 0x0160=>0x0161, 0x016E=>0x016F, 0x0391=>0x03B1, 0x0407=>0x0457, + 0x0172=>0x0173, 0x0178=>0x00FF, 0x004F=>0x006F, 0x041B=>0x043B, 0x0395=>0x03B5, + 0x0425=>0x0445, 0x0120=>0x0121, 0x017D=>0x017E, 0x017B=>0x017C, 0x0396=>0x03B6, + 0x0392=>0x03B2, 0x0388=>0x03AD, 0x1E84=>0x1E85, 0x0174=>0x0175, 0x0051=>0x0071, + 0x0417=>0x0437, 0x1E0A=>0x1E0B, 0x0147=>0x0148, 0x0104=>0x0105, 0x0408=>0x0458, + 0x014C=>0x014D, 0x00CD=>0x00ED, 0x0059=>0x0079, 0x010A=>0x010B, 0x038F=>0x03CE, + 0x0052=>0x0072, 0x0410=>0x0430, 0x0405=>0x0455, 0x0402=>0x0452, 0x0126=>0x0127, + 0x0136=>0x0137, 0x012A=>0x012B, 0x038A=>0x03AF, 0x042B=>0x044B, 0x004C=>0x006C, + 0x0397=>0x03B7, 0x0124=>0x0125, 0x0218=>0x0219, 0x00DB=>0x00FB, 0x011E=>0x011F, + 0x041E=>0x043E, 0x1E40=>0x1E41, 0x039D=>0x03BD, 0x0106=>0x0107, 0x03AB=>0x03CB, + 0x0426=>0x0446, 0x00DE=>0x00FE, 0x00C7=>0x00E7, 0x03AA=>0x03CA, 0x0421=>0x0441, + 0x0412=>0x0432, 0x010E=>0x010F, 0x00D8=>0x00F8, 0x0057=>0x0077, 0x011A=>0x011B, + 0x0054=>0x0074, 0x004A=>0x006A, 0x040B=>0x045B, 0x0406=>0x0456, 0x0102=>0x0103, + 0x039B=>0x03BB, 0x00D1=>0x00F1, 0x041D=>0x043D, 0x038C=>0x03CC, 0x00C9=>0x00E9, + 0x00D0=>0x00F0, 0x0407=>0x0457, 0x0122=>0x0123, + ); + +$UTF8_LOWER_TO_UPPER = array( + 0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042, + 0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100, + 0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393, + 0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C, + 0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F, + 0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E, + 0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3, + 0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A, + 0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9, + 0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C, + 0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4, + 0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164, + 0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156, + 0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118, + 0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128, + 0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428, + 0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055, + 0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A, + 0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC, + 0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0, + 0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D, + 0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0, + 0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5, + 0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA, + 0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045, + 0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F, + 0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048, + 0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6, + 0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407, + 0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395, + 0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396, + 0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051, + 0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408, + 0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F, + 0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126, + 0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C, + 0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E, + 0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB, + 0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421, + 0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A, + 0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102, + 0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9, + 0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122, + ); + +$UTF8_UPPER_ACCENTS = array( + 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', + 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', + 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', + 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', + 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', + 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', + 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', + 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', + 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', + 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', + 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', + 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', + 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', + 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', + 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', + ); + +$UTF8_LOWER_ACCENTS = array( + 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', + 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', + 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', + 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', + 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', + 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', + 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', + 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', + 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', + 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', + 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', + 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', + 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', + 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', + 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', + ); + +header('Content-Type: text/html; charset=utf-8'); +?> + + + + +PHP-UTF8 Lookup Tables + + + +

PHP-UTF8 Lookup Tables

+

Shows you the some of the lookup tables PHP-UTF8 uses (there's also a .

+ + + +

Upper Case to Lower Case (utf8_strtolower)

+ + + +

Lower Case to Upper Case (utf8_strtoupper)

+ + + +

Upper Case Accented Characters (replaced by utf8_accents_to_ascii())

+

Note for a more complete implementation of UTF-8 to ASCII character replacement, +see the utf8_to_ascii package.

+ + + +

Lower Case Accented Characters (replaced by utf8_accents_to_ascii())

+

Note for a more complete implementation of UTF-8 to ASCII character replacement, +see the utf8_to_ascii package.

+ + + + diff --git a/includes/utf8/docs/makeDocs.pl b/includes/utf8/docs/makeDocs.pl new file mode 100755 index 0000000..f7c0488 --- /dev/null +++ b/includes/utf8/docs/makeDocs.pl @@ -0,0 +1,27 @@ +#!/usr/bin/perl -w + +use strict; +use Pod::Xhtml; +use File::Basename; + +my $filename = 'phputf8.pod'; + +my $basename = basename($filename); +$basename =~ s/.[a-z]{3}$//; + + +my $POD = Pod::Xhtml->new(); + +$POD->addHeadText(''); +$POD->addHeadText(''); +$POD->addHeadText(''); +$POD->addBodyOpenText(''); +$POD->addBodyOpenText(''); + +$POD->parse_from_file('phputf8.pod'); + diff --git a/includes/utf8/docs/phputf8.pod b/includes/utf8/docs/phputf8.pod new file mode 100644 index 0000000..d8a4542 --- /dev/null +++ b/includes/utf8/docs/phputf8.pod @@ -0,0 +1,515 @@ +# $Id: phputf8.pod,v 1.7 2006/09/30 23:38:19 harryf Exp $ + +=head1 NAME + +phputf8 - Tools for working with UTF-8 in PHP + +=head1 SYNOPSIS + + require_once '/path/to/utf8/utf8.php'; + require_once UTF8 . '/utils/validation.php'; + require_once UTF8 . '/utils/ascii.php'; + + # Check the UTF-8 is well formed + if ( !utf8_is_valid($_POST['somecontent']) ) { + + require_once UTF8 . '/utils/bad.php'; + trigger_error('Bad UTF-8 detected. Clearning', E_USER_NOTICE); + + # Strip out bad sequences - replace with ? character + $_POST['somecontent'] = utf8_bad_replace($_POST['somecontent']); + + } + + # This works fine with UTF-8 + $_POST['somecontent'] = ltrim($_POST['somecontent']); + + # If it contains only ascii chars, use native str fns for speed... + if ( !utf8_is_ascii($_POST['somecontent']) ) { + + $endfirstword = strpos($_POST['somecontent'],' '); + $firstword = substr($_POST['somecontent'],0,$endOfFirstWord); + $firstword = strtoupper($firstword); + $therest = substr($_POST['somecontent'],$endOfFirstWord); + + } else { + + # It contains multibyte sequences - use the slower but safe + $endfirstword = utf8_strpos($_POST['somecontent'],' '); + $firstword = utf8_substr($_POST['somecontent'],0,$endOfFirstWord); + $firstword = utf8_strtoupper($firstword); + $therest = utf8_substr($_POST['somecontent'],$endOfFirstWord); + + } + + # htmlspecialchars is also safe for use with UTF-8 + header("Content-Type: text/html; charset=utf-8"); + echo "
";
+    echo "".htmlspecialchars($firstword)."";
+    echo htmlspecialchars($therest);
+    echo "
"; + + +=head1 DESCRIPTION + +phputf8 does a few things for you; + +=over + +=item * Provides UTF-8 aware versions of PHP's string functions + +All of these functions are prefixed with C. Six of these functions +are loaded "on the fly", depending on whether you have the mbstring +extension available. The rest build on top of those six. + +See L. + +=item * Detection of bad UTF-8 sequences + +The file C contains functions for testing +strings for bad UTF-8 sequences. Note that other functions in the library +assume valid UTF-8. + +See L + +=item * Cleaning of bad UTF-8 sequences + +Functions for stripping or replacing bad sequences are available in +C + +See L + +=item * Detecting pure ASCII & stripping non-ASCII + +The file C contains utilities to detect +whether a UTF-8 string contains just ASCII characters (allowing +you to use PHP's faster, native, string functions) and also stripping +everything non-ASCII from a string + +See L + +=item * Basic transliteration + +The file C contains basic transliteration +functionality (L) - not +much but enough to convert common European, non-ascii characters to +a reasonable ASCII equivalent. You might use these when preparing a +string for use as a filename, afterwhich you strip all other non-ascii +characters using the ASCII utilities. + +Further transliteration is provided in the C package +at L. Much more powerful +functionality is provided by the pecl transliteration extension - +L and +L. + +See L + +=back + +=head1 String Functions + +There are seven essential functions provided by phputf8, which are +required by many of the other functions. These are all loaded +when you include the main C script e.g. + + require_once '/path/to/utf8/utf8.php'; + +Six of these functions depend on whether the mbstring extension is +installed (see L) - if it is available, +the following functions will be wrappers around the equivalent +mb_string functions; + +=over + +=item * C + +=item * C + +=item * C + +=item * C + +=item * C + +=item * C + +=back + +B phputf8 cannot support mbstring function overloading; +it relies in some cases on PHP's native string functions +counting characters as bytes. + +The seventh function is C, which is +implemented independent of mbstring (mbstring doesn't +provide it). + +B - if you do not load C and you wish +to use the mbstring implementations, you need to set the mbstring +encoding to UTF-8 yourself - see L. + +=head2 Further string functions + +All other string functions must be included on demand. They are +available directly under the C directory with filenames +corresponding to the equivalent PHP string functions, but still +with the function prefix C. + +For example, to load the strrev implementation; + + # Load the main script + require_once '/path/to/utf8/utf8.php'; + + # Load the UTF-8 aware strrev implementation + require_once UTF8 . '/strrev.php'; + print utf8_strrev('Iñtërnâtiônàlizætiøn')."\n"; + +All string implementations are found in the C directory. +For documentation for each function, see the phpdocs +L. + +B Some of the functions, such as C take +arguments like 'start' and 'length', requiring values in terms +of I not bytes - i.e. return values from functions +like C and C. Additional implementations +would be useful which take byte indexes instead of character +positions - this would allow further advantage to be taken of +UTF-8's design and more use of PHP's native functions for performance. + +=head1 UTF-8 Validation and Cleaning + +It's important to understand that multi-byte UTF-8 characters can be +badly formed. UTF-8 has rules regarding multi-byte characters and those +rules can be broken. Some possible reasons why a sequence of bytes +might be badly formed UTF-8; + +=over + +=item It's a different character encoding + +For example, 8 bit characters in ISO-8859-1 would be badly formed UTF-8. +That said, characters declared as ISO-8859-1 but still within the ASCII-7 +range would still be valid UTF-8. + +=item It's a corrupted UTF-8 string + +Something has mangled the UTF-8 string (PHP's native strrev function, +for example, would do this). + +=item Someone is injecting badly formed UTF-8 input deliberately. + +They might be attempting to "break" you RSS feed, for example. + +=back + +With that in mind, the functions provided in C<./utils/validation.php> +and C<./utils/bad.php> are intend to help guard against such problems. + +=head2 Validation + +There are two functions in C<./utils/validation.php>, one "strict" +and the other slightly more relaxed. + +The strict version is C - as well is checking each +sequence, byte-by-byte, it also regards sequences which are not +part of the Unicode standard as being invalid (UTF-8 allows for +5 and 6 byte sequences but have no meaning in Unicode, and will +result in browsers displaying "junk" characters (e.g. ? character). + +The second function C relies of behaviour of +PHP's PCRE extension, to spot invalid UTF-8 sequences. This +function will pass 5 and 6 byte sequences but also performs +much better than C. + +Both are simple to use; + + require_once UTF8 . '/utils/validation.php'; + if ( utf8_is_valid($str) ) { + print "Its valid\n"; + } + if ( utf8_is_compliant($str) ) { + print "Its compliant\n"; + } + + +=head2 Cleaning UTF-8 + +If you detect a UTF-8 encoded string contains badly formed +sequences, functions in C<./utils/bad.php> can help. Be warned +that performance on large strings will be an issue. + +It provides the following functitons; + +=over + +=item * C + +Locates the I bad byte in a UTF-8 string, returning it's +I (not chacacter) position in the string. You might use this +for iterative cleaning or analysis of a UTF-8 string for example; + + require_once UTF8 . '/utils/validation.php'; + require_once UTF8 . '/utils/bad.php'; + + $clean = ''; + while ( FALSE !== ( $badIndex = utf8_bad_find($str) ) ) { + print "Bad byte found at $badIndex\n"; + $clean .= substr($str,0,$badIndex); + $str = substr($str,$badIndex+1); + } + $clean .= $str; + +=item * C + +The same as C but searches the complete string and +returns the index of all bad bytes found in an array + +=item * C + +Removes all bad bytes from a UTF-8 string, returning the cleaned string + +=item * C + +Removes all bad bytes from a UTF-8 string and replaces them with some +other character (default is ?) + +=item * C and C + +Together these two functions attempt to provide a reason why a +particular byte is not valid UTF-8. Perhaps you might use these +when logging errors. + +=back + +=head2 Warning on ASCII Control Characters + +The above functions for validating and cleaning UTF-8 strings +all regard ASCII control characters as being valid and +acceptable. But ASCII control chars are not acceptable in XML +documents - use the C function in +C<./utils/ascii.php> (available v0.3+), which will remove +all ASCII control characters that are illegal in XML. + +See L. + +=head2 Strategy + +Because validation and cleaning UTF-8 strings comes with a pretty high +cost, in terms of performance, you should be aiming to do this once +only, at the point where you receive some input (e.g. a submitted form) +before going on to using the rest of the string functions in this library. + +You should also be aware that validation and cleaning is your job - +the utf8_* string functions I they are being given well formed +UTF-8 to process, because the performance overhead of checking, every +time you called C, for example, would be very high. + +=head1 Performance and Optimization + +The first thing you I be attempting to do is replace all use of PHP's +native string functions with functions from this library. Doing so will have +a dramatic (and bad) effect on your codes performance. It also misses opportunities +you may have to continue using PHP's native string functions. + +There are two main areas to consider, when working out how to support UTF-8 +with this library and achieve optimal performance. + +=head2 When data is 99% ASCII + +First, if the majority of the data your application will be processing is +written in English, most of the time you will be able to use PHP's native +string functions, only using the utf8_* string functions when you encounter +multibyte characters. This has already been implied above in the example +in the L. Most characters used in English fall within the +ASCII-7 range and ASCII characters in UTF-8 are no different to normal +ASCII characters. + +So check whether a string is 100% ASCII first, and if so, use PHP's native +string functions on it. + + require_once '/path/to/utf8/utf8.php'; + require_once UTF8 . '/utils/ascii.php'; + + if ( utf8_is_ascii($string) ) { + # use native PHP string functions + } else { + # use utf8_* string functions + } + +=head2 Exploiting UTF-8's design + +Second, you may be able to exploit UTF-8's design to your advantage, +depending on what I you are doing to a string. This road +requires more effort and a good understanding of UTF-8's design. + +As a starting point, you really need to examine the range table +shown on Wikipedias page on UTF-8 L. + +Some key points about UTF-8's design; + +=over + +=item UTF-8 is a superset of ASCII + +In other words ASCII-7 characters are encoded in exactly the same +way as normal. These characters are those shown of the I +table L - the first 128 characters. + +Note that the second table shown at L +"Extended ASCII characters" are not ASCII-7 characters are I +encoded differently in UTF-8 (probably using 2 bytes). Those +characters seem to be ISO-8859-1 - occasionally you will seen +people saying UTF-8 is backwards compatible with ISO-8859-1 - this +is I. + +One specific example which illustrates this; + + $new_utf8_str = strstr('Iñtërnâtiônàlizætiøn','l'); + +Using the "needle" character 'l' (in the ASCII-7 range), this +example works without any problems, the variable C<$new_utf8_str> +being assigned the value 'lizætiøn', even though the haystack +string contains multibyte characters. + +Actually this example leads into the next point... + +=item Every character sequence is unique in UTF-8 + +Assuming that a UTF-8 encoded string is well formed, any sequence +in that string representing a single character (be it a single +byte ASCII character or a multi byte character) cannot be mistaken +is as a subsequence of a larger multi byte sequence. + +That means all of the following examples work; + + # Pop off a piece of a string using multi-byte character + $new_utf8_str = strstr('Iñtërnâtiônàlizætiøn','ô'); + + # Explode string using multibyte character + $array = explode('ô','Iñtërnâtiônàlizætiøn'); + + # Using byte index instead of chacter index... + $haystack = 'Iñtërnâtiônàlizætiøn'; + $needle = 'ô'; + $pos = strpos($haystack, $needle); + print "Position in bytes is $pos
"; + $substr = substr($haystack, 0, $pos); + print "Substr: $substr
"; + + +=back + +Put those together and often you will be able to use existing code +with little or no modification. + +Often you will be able to continue working in bytes instead of +logical characters (as the last example above shows). + +There are some functions which you I always need to replace, +for example C. You should be able to get some idea of +which these functions are by looking at +L. + + +=head1 Transliteration + +Sometimes you will need to be able to remove all multi-byte +characters from a UTF-8 string and use only ASCII. Some +possible reasons why; + +=over + +=item Interfaces to systems with no support for UTF-8 + +An application might be accessing data from your application +but lack support for UTF-8. You may need to remove all non- +ASCII-7 characters for it. + +=item Filenames + +Although most modern operating systems support Unicode, not +all applications running under that OS may do so and you may +be exposing yourself to security issues by allowing multi +byte characters in filenames. + +=item Urls + +Similar issues to filenames - most modern browsers support +the use of UTF-8 in URLs but doing so may not be a smart +idea e.g. potential for phishing via the use of similar +looking (to humans) characters. + +=item Primary Keys / Identifiers + +It is probably unwise to allow multi-byte UTF-8 characters into +certain critical "fields" in your application, such as a username. +Someone might be able to register a user with a similar looking +name to an admin user - consider "admin" vs. "admın" < hard to +spot the difference (note the ı character in the second example). + +=back + +=head2 Stripping multi byte characters + +To simply remove all multibyte characters, the C<./utils/ascii.php> +collection of functions can help e.g.; + + require_once '/path/to/utf8/utf8.php'; + require_once UTF8 . '/utils/ascii.php'; + $str = "admın"; + print utf8_strip_non_ascii($str); // prints "admn" + +Not also the C function which also - +strips out ASCII control codes - see +L for information on that +topic. + +=head2 Transliteration Utilities + +Now simply throwing out characters is not kind to users. An +alternative is transliteration, where you try to replace multi +byte characters with equivalent ASCII characters that a human +would understand. For example "Zürich" could be converted to +"Zuerich", the multi byte "ü" character being replaced by "ue". + +See L for a +general introduction to transliteration. + +The main phputf8 package contains a single function in +the C<./utils/ascii.php> script that does some (basic) +replacements of accented characters common in languages +like French. After using this function, you should still +strip out all remaining multi-byte characters. For +example; + + require_once '/path/to/utf8/utf8.php'; + require_once UTF8 . '/utils/ascii.php'; + + $filename = utf8_accents_to_ascii($filename); + $filename = utf8_strip_non_ascii($filename); + +This will at least preserve I characters in an +ASCII form that will be understandable by users. + +Further an much more powerful transliteration +capabilities are provided in the seperate utf8_to_ascii +package distributed at L. +Because it is a port of Perls' L package +to PHP, it is distruted under the same license. + +A quick intro to utf8_to_ascii and be found at +L + +Be warned that utf8_to_ascii does have limitations and a better +choice, if you have rights to install it in your environemt, is +Derick Rethans transliteration extension: +L. + + +=head1 SEE ALSO + +L, +L +L +L - Unicode normalization in PHP +L diff --git a/includes/utf8/docs/print.css b/includes/utf8/docs/print.css new file mode 100644 index 0000000..77b5a6e --- /dev/null +++ b/includes/utf8/docs/print.css @@ -0,0 +1,68 @@ +body { + font: 10pt "Lucida Grande", Verdana, Lucida, Helvetica, Arial, sans-serif; + background-color: White; + color: Black; +} +table { + font-size: 100%; + padding:0; + margin:0; +} +tr,td,th {padding:0; margin:0;} +img {border:0} +a { + color:#000000; + text-decoration:none; +} +pod.page { + text-align: justify; +} +h1, h2, h3, h4, h5 { + color: Black; + background-color: transparent; + font-family: "Lucida Grande", Verdana, Lucida, Helvetica, Arial, sans-serif; + font-size: 100%; + font-weight: normal; + margin-left: 0; + margin-right: 0; + margin-top: 0; + margin-bottom: 1em; + padding-left: 0; + padding-right: 0; + padding-top: 0.5em; + padding-bottom: 0; + border-bottom: 1px solid #000000; + clear:left; +} +h1 {font-size: 160%; font-weight: bold;} +h2 {font-size: 150%; } +h3 {font-size: 140%; border-bottom: none; } +h4 {font-size: 120%; border-bottom: none; } +h5 {font-size: 100%; border-bottom: none; } +ul { + line-height: 1.5em; + list-style-type: square; + margin: 0 0 1.0em 1.5em; + padding: 0; + +} +ol { + line-height: 1.0em; + margin: 0 0 1.0em 1.0em; + padding: 0; + font-weight: normal; +} + +pre { + font-size: 8pt; + padding: 0.5em; + border: 1px dashed #000000; + color: Black; + overflow: visible; + font-family: "Courier New",Courier,monospace; +} +code { + font-family: "Courier New",Courier,monospace; +} +.toplink {display:none} +#nav {display:none} diff --git a/includes/utf8/docs/screen.css b/includes/utf8/docs/screen.css new file mode 100644 index 0000000..27099c9 --- /dev/null +++ b/includes/utf8/docs/screen.css @@ -0,0 +1,52 @@ +/* +* Adapted from http://resources.neolao.com/php/dokuwiki/templates +*/ +* { + font-family: Verdana, Helvetica, Arial, sans-serif; + font-size: small; +} +body { + background: white; +} +a { text-decoration: none; color: #5d579d; } +a:hover { color: #827db7; } +h1 { + color: #f4a600; + font-size: 1.5em; +} +h1.title { + background-color: #fffff5; + font-size: 2.0em; +} +h2 { + color: #f4a600; + font-size: 1.2em; +} +h3 { + color: #817cb6; + font-size: 1em; +} +h4 { + color: #817cb6; + font-size: 1em; +} +h5 { + color: #817cb6; + font-size: 1em; +} +table { border: solid 1px #999; } +table th { border: solid 1px #999; background-color: #eee; } +table td { border: solid 1px #ccc; background-color: #fff; } +table tr:hover { background-color: #ffffe5; } +blockquote { border-left: solid 2px #a58fbb; margin: 0; padding: 0 0 0 .5em; } +p { margin: .5em 0 0 0; } +pre { + padding: 2px; + border: 1px dotted #8cacbb; + color: #000; + overflow: auto; + background-color: #fffff5; font-size: 1em; font-family: "Courier New",Courier,monospace; +} +pre * { background-color: #fffff5; font-size: 1em; font-family: "Courier New",Courier,monospace; } +code { background-color: #fffff5; border: 1px dotted #8cacbb; font-size: 1em; font-family: "Courier New",Courier,monospace; } + diff --git a/includes/utf8/exp/regexunicode.php b/includes/utf8/exp/regexunicode.php new file mode 100644 index 0000000..eb9c1d5 --- /dev/null +++ b/includes/utf8/exp/regexunicode.php @@ -0,0 +1,37 @@ + +* @link http://www.php.net/manual/en/function.strlen.php +* @link http://www.php.net/manual/en/function.utf8-decode.php +* @param string UTF-8 string +* @return int number of UTF-8 characters in string +* @package utf8 +* @subpackage strings +*/ +function utf8_strlen($str){ + return strlen(utf8_decode($str)); +} + + +//-------------------------------------------------------------------- +/** +* UTF-8 aware alternative to strpos +* Find position of first occurrence of a string +* Note: This will get alot slower if offset is used +* Note: requires utf8_strlen amd utf8_substr to be loaded +* @param string haystack +* @param string needle (you should validate this with utf8_is_valid) +* @param integer offset in characters (from left) +* @return mixed integer position or FALSE on failure +* @see http://www.php.net/strpos +* @see utf8_strlen +* @see utf8_substr +* @package utf8 +* @subpackage strings +*/ +function utf8_strpos($str, $needle, $offset = NULL) { + + if ( is_null($offset) ) { + + $ar = explode($needle, $str, 2); + if ( count($ar) > 1 ) { + return utf8_strlen($ar[0]); + } + return FALSE; + + } else { + + if ( !is_int($offset) ) { + trigger_error('utf8_strpos: Offset must be an integer',E_USER_ERROR); + return FALSE; + } + + $str = utf8_substr($str, $offset); + + if ( FALSE !== ( $pos = utf8_strpos($str, $needle) ) ) { + return $pos + $offset; + } + + return FALSE; + } + +} + +//-------------------------------------------------------------------- +/** +* UTF-8 aware alternative to strrpos +* Find position of last occurrence of a char in a string +* Note: This will get alot slower if offset is used +* Note: requires utf8_substr and utf8_strlen to be loaded +* @param string haystack +* @param string needle (you should validate this with utf8_is_valid) +* @param integer (optional) offset (from left) +* @return mixed integer position or FALSE on failure +* @see http://www.php.net/strrpos +* @see utf8_substr +* @see utf8_strlen +* @package utf8 +* @subpackage strings +*/ +function utf8_strrpos($str, $needle, $offset = NULL) { + + if ( is_null($offset) ) { + + $ar = explode($needle, $str); + + if ( count($ar) > 1 ) { + // Pop off the end of the string where the last match was made + array_pop($ar); + $str = join($needle,$ar); + return utf8_strlen($str); + } + return FALSE; + + } else { + + if ( !is_int($offset) ) { + trigger_error('utf8_strrpos expects parameter 3 to be long',E_USER_WARNING); + return FALSE; + } + + $str = utf8_substr($str, $offset); + + if ( FALSE !== ( $pos = utf8_strrpos($str, $needle) ) ) { + return $pos + $offset; + } + + return FALSE; + } + +} + +//-------------------------------------------------------------------- +/** +* UTF-8 aware alternative to substr +* Return part of a string given character offset (and optionally length) +* +* Note arguments: comparied to substr - if offset or length are +* not integers, this version will not complain but rather massages them +* into an integer. +* +* Note on returned values: substr documentation states false can be +* returned in some cases (e.g. offset > string length) +* mb_substr never returns false, it will return an empty string instead. +* This adopts the mb_substr approach +* +* Note on implementation: PCRE only supports repetitions of less than +* 65536, in order to accept up to MAXINT values for offset and length, +* we'll repeat a group of 65535 characters when needed. +* +* Note on implementation: calculating the number of characters in the +* string is a relatively expensive operation, so we only carry it out when +* necessary. It isn't necessary for +ve offsets and no specified length +* +* @author Chris Smith +* @param string +* @param integer number of UTF-8 characters offset (from left) +* @param integer (optional) length in UTF-8 characters from offset +* @return mixed string or FALSE if failure +* @package utf8 +* @subpackage strings +*/ +function utf8_substr($str, $offset, $length = NULL) { + + // generates E_NOTICE + // for PHP4 objects, but not PHP5 objects + $str = (string)$str; + $offset = (int)$offset; + if (!is_null($length)) $length = (int)$length; + + // handle trivial cases + if ($length === 0) return ''; + if ($offset < 0 && $length < 0 && $length < $offset) + return ''; + + // normalise negative offsets (we could use a tail + // anchored pattern, but they are horribly slow!) + if ($offset < 0) { + + // see notes + $strlen = strlen(utf8_decode($str)); + $offset = $strlen + $offset; + if ($offset < 0) $offset = 0; + + } + + $Op = ''; + $Lp = ''; + + // establish a pattern for offset, a + // non-captured group equal in length to offset + if ($offset > 0) { + + $Ox = (int)($offset/65535); + $Oy = $offset%65535; + + if ($Ox) { + $Op = '(?:.{65535}){'.$Ox.'}'; + } + + $Op = '^(?:'.$Op.'.{'.$Oy.'})'; + + } else { + + // offset == 0; just anchor the pattern + $Op = '^'; + + } + + // establish a pattern for length + if (is_null($length)) { + + // the rest of the string + $Lp = '(.*)$'; + + } else { + + if (!isset($strlen)) { + // see notes + $strlen = strlen(utf8_decode($str)); + } + + // another trivial case + if ($offset > $strlen) return ''; + + if ($length > 0) { + + // reduce any length that would + // go passed the end of the string + $length = min($strlen-$offset, $length); + + $Lx = (int)( $length / 65535 ); + $Ly = $length % 65535; + + // negative length requires a captured group + // of length characters + if ($Lx) $Lp = '(?:.{65535}){'.$Lx.'}'; + $Lp = '('.$Lp.'.{'.$Ly.'})'; + + } else if ($length < 0) { + + if ( $length < ($offset - $strlen) ) { + return ''; + } + + $Lx = (int)((-$length)/65535); + $Ly = (-$length)%65535; + + // negative length requires ... capture everything + // except a group of -length characters + // anchored at the tail-end of the string + if ($Lx) $Lp = '(?:.{65535}){'.$Lx.'}'; + $Lp = '(.*)(?:'.$Lp.'.{'.$Ly.'})$'; + + } + + } + + if (!preg_match( '#'.$Op.$Lp.'#us',$str, $match )) { + return ''; + } + + return $match[1]; + +} + +//--------------------------------------------------------------- +/** +* UTF-8 aware alternative to strtolower +* Make a string lowercase +* Note: The concept of a characters "case" only exists is some alphabets +* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does +* not exist in the Chinese alphabet, for example. See Unicode Standard +* Annex #21: Case Mappings +* Note: requires utf8_to_unicode and utf8_from_unicode +* @author Andreas Gohr +* @param string +* @return mixed either string in lowercase or FALSE is UTF-8 invalid +* @see http://www.php.net/strtolower +* @see utf8_to_unicode +* @see utf8_from_unicode +* @see http://www.unicode.org/reports/tr21/tr21-5.html +* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php +* @package utf8 +* @subpackage strings +*/ +function utf8_strtolower($string){ + + static $UTF8_UPPER_TO_LOWER = NULL; + + if ( is_null($UTF8_UPPER_TO_LOWER) ) { + $UTF8_UPPER_TO_LOWER = array( + 0x0041=>0x0061, 0x03A6=>0x03C6, 0x0162=>0x0163, 0x00C5=>0x00E5, 0x0042=>0x0062, + 0x0139=>0x013A, 0x00C1=>0x00E1, 0x0141=>0x0142, 0x038E=>0x03CD, 0x0100=>0x0101, + 0x0490=>0x0491, 0x0394=>0x03B4, 0x015A=>0x015B, 0x0044=>0x0064, 0x0393=>0x03B3, + 0x00D4=>0x00F4, 0x042A=>0x044A, 0x0419=>0x0439, 0x0112=>0x0113, 0x041C=>0x043C, + 0x015E=>0x015F, 0x0143=>0x0144, 0x00CE=>0x00EE, 0x040E=>0x045E, 0x042F=>0x044F, + 0x039A=>0x03BA, 0x0154=>0x0155, 0x0049=>0x0069, 0x0053=>0x0073, 0x1E1E=>0x1E1F, + 0x0134=>0x0135, 0x0427=>0x0447, 0x03A0=>0x03C0, 0x0418=>0x0438, 0x00D3=>0x00F3, + 0x0420=>0x0440, 0x0404=>0x0454, 0x0415=>0x0435, 0x0429=>0x0449, 0x014A=>0x014B, + 0x0411=>0x0431, 0x0409=>0x0459, 0x1E02=>0x1E03, 0x00D6=>0x00F6, 0x00D9=>0x00F9, + 0x004E=>0x006E, 0x0401=>0x0451, 0x03A4=>0x03C4, 0x0423=>0x0443, 0x015C=>0x015D, + 0x0403=>0x0453, 0x03A8=>0x03C8, 0x0158=>0x0159, 0x0047=>0x0067, 0x00C4=>0x00E4, + 0x0386=>0x03AC, 0x0389=>0x03AE, 0x0166=>0x0167, 0x039E=>0x03BE, 0x0164=>0x0165, + 0x0116=>0x0117, 0x0108=>0x0109, 0x0056=>0x0076, 0x00DE=>0x00FE, 0x0156=>0x0157, + 0x00DA=>0x00FA, 0x1E60=>0x1E61, 0x1E82=>0x1E83, 0x00C2=>0x00E2, 0x0118=>0x0119, + 0x0145=>0x0146, 0x0050=>0x0070, 0x0150=>0x0151, 0x042E=>0x044E, 0x0128=>0x0129, + 0x03A7=>0x03C7, 0x013D=>0x013E, 0x0422=>0x0442, 0x005A=>0x007A, 0x0428=>0x0448, + 0x03A1=>0x03C1, 0x1E80=>0x1E81, 0x016C=>0x016D, 0x00D5=>0x00F5, 0x0055=>0x0075, + 0x0176=>0x0177, 0x00DC=>0x00FC, 0x1E56=>0x1E57, 0x03A3=>0x03C3, 0x041A=>0x043A, + 0x004D=>0x006D, 0x016A=>0x016B, 0x0170=>0x0171, 0x0424=>0x0444, 0x00CC=>0x00EC, + 0x0168=>0x0169, 0x039F=>0x03BF, 0x004B=>0x006B, 0x00D2=>0x00F2, 0x00C0=>0x00E0, + 0x0414=>0x0434, 0x03A9=>0x03C9, 0x1E6A=>0x1E6B, 0x00C3=>0x00E3, 0x042D=>0x044D, + 0x0416=>0x0436, 0x01A0=>0x01A1, 0x010C=>0x010D, 0x011C=>0x011D, 0x00D0=>0x00F0, + 0x013B=>0x013C, 0x040F=>0x045F, 0x040A=>0x045A, 0x00C8=>0x00E8, 0x03A5=>0x03C5, + 0x0046=>0x0066, 0x00DD=>0x00FD, 0x0043=>0x0063, 0x021A=>0x021B, 0x00CA=>0x00EA, + 0x0399=>0x03B9, 0x0179=>0x017A, 0x00CF=>0x00EF, 0x01AF=>0x01B0, 0x0045=>0x0065, + 0x039B=>0x03BB, 0x0398=>0x03B8, 0x039C=>0x03BC, 0x040C=>0x045C, 0x041F=>0x043F, + 0x042C=>0x044C, 0x00DE=>0x00FE, 0x00D0=>0x00F0, 0x1EF2=>0x1EF3, 0x0048=>0x0068, + 0x00CB=>0x00EB, 0x0110=>0x0111, 0x0413=>0x0433, 0x012E=>0x012F, 0x00C6=>0x00E6, + 0x0058=>0x0078, 0x0160=>0x0161, 0x016E=>0x016F, 0x0391=>0x03B1, 0x0407=>0x0457, + 0x0172=>0x0173, 0x0178=>0x00FF, 0x004F=>0x006F, 0x041B=>0x043B, 0x0395=>0x03B5, + 0x0425=>0x0445, 0x0120=>0x0121, 0x017D=>0x017E, 0x017B=>0x017C, 0x0396=>0x03B6, + 0x0392=>0x03B2, 0x0388=>0x03AD, 0x1E84=>0x1E85, 0x0174=>0x0175, 0x0051=>0x0071, + 0x0417=>0x0437, 0x1E0A=>0x1E0B, 0x0147=>0x0148, 0x0104=>0x0105, 0x0408=>0x0458, + 0x014C=>0x014D, 0x00CD=>0x00ED, 0x0059=>0x0079, 0x010A=>0x010B, 0x038F=>0x03CE, + 0x0052=>0x0072, 0x0410=>0x0430, 0x0405=>0x0455, 0x0402=>0x0452, 0x0126=>0x0127, + 0x0136=>0x0137, 0x012A=>0x012B, 0x038A=>0x03AF, 0x042B=>0x044B, 0x004C=>0x006C, + 0x0397=>0x03B7, 0x0124=>0x0125, 0x0218=>0x0219, 0x00DB=>0x00FB, 0x011E=>0x011F, + 0x041E=>0x043E, 0x1E40=>0x1E41, 0x039D=>0x03BD, 0x0106=>0x0107, 0x03AB=>0x03CB, + 0x0426=>0x0446, 0x00DE=>0x00FE, 0x00C7=>0x00E7, 0x03AA=>0x03CA, 0x0421=>0x0441, + 0x0412=>0x0432, 0x010E=>0x010F, 0x00D8=>0x00F8, 0x0057=>0x0077, 0x011A=>0x011B, + 0x0054=>0x0074, 0x004A=>0x006A, 0x040B=>0x045B, 0x0406=>0x0456, 0x0102=>0x0103, + 0x039B=>0x03BB, 0x00D1=>0x00F1, 0x041D=>0x043D, 0x038C=>0x03CC, 0x00C9=>0x00E9, + 0x00D0=>0x00F0, 0x0407=>0x0457, 0x0122=>0x0123, + ); + } + + $uni = utf8_to_unicode($string); + + if ( !$uni ) { + return FALSE; + } + + $cnt = count($uni); + for ($i=0; $i < $cnt; $i++){ + if ( isset($UTF8_UPPER_TO_LOWER[$uni[$i]]) ) { + $uni[$i] = $UTF8_UPPER_TO_LOWER[$uni[$i]]; + } + } + + return utf8_from_unicode($uni); +} + +//--------------------------------------------------------------- +/** +* UTF-8 aware alternative to strtoupper +* Make a string uppercase +* Note: The concept of a characters "case" only exists is some alphabets +* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does +* not exist in the Chinese alphabet, for example. See Unicode Standard +* Annex #21: Case Mappings +* Note: requires utf8_to_unicode and utf8_from_unicode +* @author Andreas Gohr +* @param string +* @return mixed either string in lowercase or FALSE is UTF-8 invalid +* @see http://www.php.net/strtoupper +* @see utf8_to_unicode +* @see utf8_from_unicode +* @see http://www.unicode.org/reports/tr21/tr21-5.html +* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php +* @package utf8 +* @subpackage strings +*/ +function utf8_strtoupper($string){ + + static $UTF8_LOWER_TO_UPPER = NULL; + + if ( is_null($UTF8_LOWER_TO_UPPER) ) { + $UTF8_LOWER_TO_UPPER = array( + 0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042, + 0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100, + 0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393, + 0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C, + 0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F, + 0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E, + 0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3, + 0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A, + 0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9, + 0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C, + 0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4, + 0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164, + 0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156, + 0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118, + 0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128, + 0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428, + 0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055, + 0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A, + 0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC, + 0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0, + 0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D, + 0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0, + 0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5, + 0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA, + 0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045, + 0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F, + 0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048, + 0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6, + 0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407, + 0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395, + 0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396, + 0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051, + 0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408, + 0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F, + 0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126, + 0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C, + 0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E, + 0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB, + 0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421, + 0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A, + 0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102, + 0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9, + 0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122, + ); + } + + $uni = utf8_to_unicode($string); + + if ( !$uni ) { + return FALSE; + } + + $cnt = count($uni); + for ($i=0; $i < $cnt; $i++){ + if( isset($UTF8_LOWER_TO_UPPER[$uni[$i]]) ) { + $uni[$i] = $UTF8_LOWER_TO_UPPER[$uni[$i]]; + } + } + + return utf8_from_unicode($uni); +} diff --git a/includes/utf8/ord.php b/includes/utf8/ord.php new file mode 100644 index 0000000..79f13dc --- /dev/null +++ b/includes/utf8/ord.php @@ -0,0 +1,92 @@ += 0 && $ord0 <= 127 ) { + return $ord0; + } + + if ( !isset($chr{1}) ) { + trigger_error('Short sequence - at least 2 bytes expected, only 1 seen'); + return FALSE; + } + + $ord1 = ord($chr{1}); + if ( $ord0 >= 192 && $ord0 <= 223 ) { + return ( $ord0 - 192 ) * 64 + + ( $ord1 - 128 ); + } + + if ( !isset($chr{2}) ) { + trigger_error('Short sequence - at least 3 bytes expected, only 2 seen'); + return FALSE; + } + $ord2 = ord($chr{2}); + if ( $ord0 >= 224 && $ord0 <= 239 ) { + return ($ord0-224)*4096 + + ($ord1-128)*64 + + ($ord2-128); + } + + if ( !isset($chr{3}) ) { + trigger_error('Short sequence - at least 4 bytes expected, only 3 seen'); + return FALSE; + } + $ord3 = ord($chr{3}); + if ($ord0>=240 && $ord0<=247) { + return ($ord0-240)*262144 + + ($ord1-128)*4096 + + ($ord2-128)*64 + + ($ord3-128); + + } + + if ( !isset($chr{4}) ) { + trigger_error('Short sequence - at least 5 bytes expected, only 4 seen'); + return FALSE; + } + $ord4 = ord($chr{4}); + if ($ord0>=248 && $ord0<=251) { + return ($ord0-248)*16777216 + + ($ord1-128)*262144 + + ($ord2-128)*4096 + + ($ord3-128)*64 + + ($ord4-128); + } + + if ( !isset($chr{5}) ) { + trigger_error('Short sequence - at least 6 bytes expected, only 5 seen'); + return FALSE; + } + if ($ord0>=252 && $ord0<=253) { + return ($ord0-252) * 1073741824 + + ($ord1-128)*16777216 + + ($ord2-128)*262144 + + ($ord3-128)*4096 + + ($ord4-128)*64 + + (ord($c{5})-128); + } + + if ( $ord0 >= 254 && $ord0 <= 255 ) { + trigger_error('Invalid UTF-8 with surrogate ordinal '.$ord0); + return FALSE; + } + +} + diff --git a/includes/utf8/str_ireplace.php b/includes/utf8/str_ireplace.php new file mode 100644 index 0000000..4e4bae4 --- /dev/null +++ b/includes/utf8/str_ireplace.php @@ -0,0 +1,80 @@ + +* @param string $input +* @param int $length +* @param string $padStr +* @param int $type ( same constants as str_pad ) +* @return string +* @see http://www.php.net/str_pad +* @see utf8_substr +* @package utf8 +* @subpackage strings +*/ +function utf8_str_pad($input, $length, $padStr = ' ', $type = STR_PAD_RIGHT) { + + $inputLen = utf8_strlen($input); + if ($length <= $inputLen) { + return $input; + } + + $padStrLen = utf8_strlen($padStr); + $padLen = $length - $inputLen; + + if ($type == STR_PAD_RIGHT) { + $repeatTimes = ceil($padLen / $padStrLen); + return utf8_substr($input . str_repeat($padStr, $repeatTimes), 0, $length); + } + + if ($type == STR_PAD_LEFT) { + $repeatTimes = ceil($padLen / $padStrLen); + return utf8_substr(str_repeat($padStr, $repeatTimes), 0, floor($padLen)) . $input; + } + + if ($type == STR_PAD_BOTH) { + + $padLen/= 2; + $padAmountLeft = floor($padLen); + $padAmountRight = ceil($padLen); + $repeatTimesLeft = ceil($padAmountLeft / $padStrLen); + $repeatTimesRight = ceil($padAmountRight / $padStrLen); + + $paddingLeft = utf8_substr(str_repeat($padStr, $repeatTimesLeft), 0, $padAmountLeft); + $paddingRight = utf8_substr(str_repeat($padStr, $repeatTimesRight), 0, $padAmountLeft); + return $paddingLeft . $input . $paddingRight; + } + + trigger_error('utf8_str_pad: Unknown padding type (' . $type . ')',E_USER_ERROR); +} diff --git a/includes/utf8/str_split.php b/includes/utf8/str_split.php new file mode 100644 index 0000000..b37bce7 --- /dev/null +++ b/includes/utf8/str_split.php @@ -0,0 +1,35 @@ + diff --git a/includes/utf8/tests/bench/strpos.php b/includes/utf8/tests/bench/strpos.php new file mode 100644 index 0000000..3c31e50 --- /dev/null +++ b/includes/utf8/tests/bench/strpos.php @@ -0,0 +1,7 @@ + diff --git a/includes/utf8/tests/bench/strrpos.php b/includes/utf8/tests/bench/strrpos.php new file mode 100644 index 0000000..5da54f6 --- /dev/null +++ b/includes/utf8/tests/bench/strrpos.php @@ -0,0 +1,7 @@ + diff --git a/includes/utf8/tests/bench/substr.php b/includes/utf8/tests/bench/substr.php new file mode 100644 index 0000000..8c97de7 --- /dev/null +++ b/includes/utf8/tests/bench/substr.php @@ -0,0 +1,8 @@ + diff --git a/includes/utf8/tests/benchconfig.php b/includes/utf8/tests/benchconfig.php new file mode 100644 index 0000000..fe6c215 --- /dev/null +++ b/includes/utf8/tests/benchconfig.php @@ -0,0 +1,34 @@ +UnitTestCase('test_utf8_is_ascii()'); + } + + function testUtf8() { + $str = 'testiñg'; + $this->assertFalse(utf8_is_ascii($str)); + } + + function testAscii() { + $str = 'testing'; + $this->assertTrue(utf8_is_ascii($str)); + } + + function testInvalidChar() { + $str = "tes\xe9ting"; + $this->assertFalse(utf8_is_ascii($str)); + } + + function testEmptyStr() { + $str = ''; + $this->assertTrue(utf8_is_ascii($str)); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +class test_utf8_strip_non_ascii extends UnitTestCase { + + function test_utf8_strip_non_ascii() { + $this->UnitTestCase('test_utf8_strip_non_ascii()'); + } + + function testUtf8() { + $str = 'testiñg'; + $this->assertEqual(utf8_strip_non_ascii($str),'testig'); + } + + function testAscii() { + $str = 'testing'; + $this->assertEqual(utf8_strip_non_ascii($str),'testing'); + } + + function testInvalidChar() { + $str = "tes\xe9ting"; + $this->assertEqual(utf8_strip_non_ascii($str),'testing'); + } + + function testEmptyStr() { + $str = ''; + $this->assertEqual(utf8_strip_non_ascii($str),''); + } +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +class test_utf8_strip_non_ascii_ctrl extends UnitTestCase{ + + function test_utf8_strip_non_ascii_ctrl() { + $this->UnitTestCase('test_utf8_strip_non_ascii_ctrl'); + } + + function testNulAndNon7Bit() { + $str = "a\x00ñ\x00c"; + $this->assertEqual(utf8_strip_non_ascii_ctrl($str),'ac'); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +class test_utf8_strip_ascii_ctrl extends UnitTestCase{ + + function test_utf8_strip_ascii_ctrl() { + $this->UnitTestCase('test_utf8_strip_ascii_ctrl'); + } + + function testNul() { + $str = "a\x00b\x00c"; + $this->assertEqual(utf8_strip_ascii_ctrl($str),'abc'); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +class test_utf8_accents_to_ascii extends UnitTestCase{ + + function test_utf8_accents_to_ascii() { + $this->UnitTestCase('test_utf8_accents_to_ascii'); + } + + function testEmptyStr() { + $this->assertEqual(utf8_accents_to_ascii(''),''); + } + + function testLowercase() { + $str = "ô"; + $this->assertEqual(utf8_accents_to_ascii($str,-1),'o'); + } + + function testUppercase() { + $str = "Ô"; + $this->assertEqual(utf8_accents_to_ascii($str,1),'O'); + } + + function testBoth() { + $str = "ôÔ"; + $this->assertEqual(utf8_accents_to_ascii($str,0),'oO'); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new GroupTest('utf8_ascii'); + $test->addTestCase(new test_utf8_is_ascii()); + $test->addTestCase(new test_utf8_strip_non_ascii()); + $test->addTestCase(new test_utf8_strip_non_ascii_ctrl()); + $test->addTestCase(new test_utf8_strip_ascii_ctrl()); + $test->addTestCase(new test_utf8_accents_to_ascii()); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_bad.test.php b/includes/utf8/tests/cases/utf8_bad.test.php new file mode 100644 index 0000000..e964057 --- /dev/null +++ b/includes/utf8/tests/cases/utf8_bad.test.php @@ -0,0 +1,527 @@ +UnitTestCase('utf8_bad_find()'); + } + + function testValidUtf8() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertFalse(utf8_bad_find($str)); + } + + function testValidUtf8Ascii() { + $str = 'testing'; + $this->assertFalse(utf8_bad_find($str)); + } + + function testInvalidUtf8() { + $str = "Iñtërnâtiôn\xe9àlizætiøn"; + $this->assertEqual(utf8_bad_find($str),15); + } + + function testInvalidUtf8Ascii() { + $str = "this is an invalid char '\xe9' here"; + $this->assertEqual(utf8_bad_find($str),25); + } + + function testInvalidUtf8Start() { + $str = "\xe9Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_find($str),0); + } + + function testInvalidUtf8End() { + $str = "Iñtërnâtiônàlizætiøn\xe9"; + $this->assertEqual(utf8_bad_find($str),27); + } + + function testValidTwoOctetId() { + $str = "abc\xc3\xb1"; + $this->assertFalse(utf8_bad_find($str)); + } + + function testInvalidTwoOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn \xc3\x28 Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_find($str),28); + } + + function testInvalidIdBetweenTwoAndThree() { + $str = "Iñtërnâtiônàlizætiøn\xa0\xa1Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_find($str),27); + } + + + function testValidThreeOctetId() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x82\xa1Iñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_bad_find($str)); + } + + + function testInvalidThreeOctetSequenceSecond() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x28\xa1Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_find($str),27); + } + + function testInvalidThreeOctetSequenceThird() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x82\x28Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_find($str),27); + } + + function testValidFourOctetId() { + $str = "Iñtërnâtiônàlizætiøn\xf0\x90\x8c\xbcIñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_bad_find($str)); + } + + function testInvalidFourOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xf0\x28\x8c\xbcIñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_find($str),27); + } + + function testInvalidFiveOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xf8\xa1\xa1\xa1\xa1Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_find($str),27); + } + + function testInvalidSixOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xfc\xa1\xa1\xa1\xa1\xa1Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_find($str),27); + } + + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +class test_utf8_bad_findall extends UnitTestCase { + + function test_utf8_bad_findall() { + $this->UnitTestCase('test_utf8_bad_findall()'); + } + + function testValidUtf8() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertFalse(utf8_bad_findall($str)); + } + + function testValidUtf8Ascii() { + $str = 'testing'; + $this->assertFalse(utf8_bad_findall($str)); + } + + function testInvalidUtf8() { + $str = "Iñtërnâtiôn\xe9àlizætiøn"; + $test = array(15); + $this->assertEqual(utf8_bad_findall($str),$test); + } + + function testInvalidUtf8Ascii() { + $str = "this is an invalid char '\xe9' here"; + $test = array(25); + $this->assertEqual(utf8_bad_findall($str),$test); + } + + function testInvalidUtf8Multiple() { + $str = "\xe9Iñtërnâtiôn\xe9àlizætiøn\xe9"; + $test = array(0,16,29); + $this->assertEqual(utf8_bad_findall($str),$test); + } + + function testValidTwoOctetId() { + $str = "abc\xc3\xb1"; + $this->assertFalse(utf8_bad_findall($str)); + } + + function testInvalidTwoOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn \xc3\x28 Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_findall($str),array(28)); + } + + function testInvalidIdBetweenTwoAndThree() { + $str = "Iñtërnâtiônàlizætiøn\xa0\xa1Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_findall($str),array(27,28)); + } + + function testValidThreeOctetId() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x82\xa1Iñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_bad_findall($str)); + } + + function testInvalidThreeOctetSequenceSecond() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x28\xa1Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_findall($str),array(27,29)); + } + + function testInvalidThreeOctetSequenceThird() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x82\x28Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_find($str),27); + } + + function testValidFourOctetId() { + $str = "Iñtërnâtiônàlizætiøn\xf0\x90\x8c\xbcIñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_bad_findall($str)); + } + + function testInvalidFourOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xf0\x28\x8c\xbcIñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_findall($str),array(27,29,30)); + } + + function testInvalidFiveOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xf8\xa1\xa1\xa1\xa1Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_findall($str),range(27,31)); + } + + function testInvalidSixOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xfc\xa1\xa1\xa1\xa1\xa1Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_findall($str),range(27,32)); + } + + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +class test_utf8_bad_strip extends UnitTestCase { + + function test_utf8_bad_strip() { + $this->UnitTestCase('test_utf8_bad_strip()'); + } + + function testValidUtf8() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_bad_strip($str),$str); + } + + function testValidUtf8Ascii() { + $str = 'testing'; + $this->assertEqual(utf8_bad_strip($str),$str); + } + + function testInvalidUtf8() { + $str = "Iñtërnâtiôn\xe9àlizætiøn"; + $this->assertEqual(utf8_bad_strip($str),'Iñtërnâtiônàlizætiøn'); + } + + function testInvalidUtf8Ascii() { + $str = "this is an invalid char '\xe9' here"; + $this->assertEqual(utf8_bad_strip($str),"this is an invalid char '' here"); + } + + function testInvalidUtf8Multiple() { + $str = "\xe9Iñtërnâtiôn\xe9àlizætiøn\xe9"; + $this->assertEqual(utf8_bad_strip($str),'Iñtërnâtiônàlizætiøn'); + } + + function testValidTwoOctetId() { + $str = "abc\xc3\xb1"; + $this->assertEqual(utf8_bad_strip($str),$str); + } + + function testInvalidTwoOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn \xc3\x28 Iñtërnâtiônàlizætiøn"; + $stripped = "Iñtërnâtiônàlizætiøn \x28 Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_strip($str),$stripped); + } + + function testInvalidIdBetweenTwoAndThree() { + $str = "Iñtërnâtiônàlizætiøn\xa0\xa1Iñtërnâtiônàlizætiøn"; + $stripped = "IñtërnâtiônàlizætiønIñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_strip($str),$stripped); + } + + function testValidThreeOctetId() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x82\xa1Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_strip($str),$str); + } + + function testInvalidThreeOctetSequenceSecond() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x28\xa1Iñtërnâtiônàlizætiøn"; + $stripped = "Iñtërnâtiônàlizætiøn(Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_strip($str),$stripped); + } + + function testInvalidThreeOctetSequenceThird() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x82\x28Iñtërnâtiônàlizætiøn"; + $stripped = "Iñtërnâtiônàlizætiøn(Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_strip($str),$stripped); + } + + function testValidFourOctetId() { + $str = "Iñtërnâtiônàlizætiøn\xf0\x90\x8c\xbcIñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_strip($str),$str); + } + + function testInvalidFourOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xf0\x28\x8c\xbcIñtërnâtiônàlizætiøn"; + $stripped = "Iñtërnâtiônàlizætiøn(Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_strip($str),$stripped); + } + + function testInvalidFiveOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xf8\xa1\xa1\xa1\xa1Iñtërnâtiônàlizætiøn"; + $stripped = "IñtërnâtiônàlizætiønIñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_strip($str),$stripped); + } + + function testInvalidSixOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xfc\xa1\xa1\xa1\xa1\xa1Iñtërnâtiônàlizætiøn"; + $stripped = "IñtërnâtiônàlizætiønIñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_strip($str),$stripped); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +class test_utf8_bad_replace extends UnitTestCase { + + function test_utf8_bad_replace() { + $this->UnitTestCase('test_utf8_bad_replace()'); + } + + function testValidUtf8() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_bad_replace($str),$str); + } + + function testValidUtf8Ascii() { + $str = 'testing'; + $this->assertEqual(utf8_bad_replace($str),$str); + } + + function testInvalidUtf8() { + $str = "Iñtërnâtiôn\xe9àlizætiøn"; + $this->assertEqual(utf8_bad_replace($str),'Iñtërnâtiôn?àlizætiøn'); + } + + function testInvalidUtf8WithX() { + $str = "Iñtërnâtiôn\xe9àlizætiøn"; + $this->assertEqual(utf8_bad_replace($str,'X'),'IñtërnâtiônXàlizætiøn'); + } + + function testInvalidUtf8Ascii() { + $str = "this is an invalid char '\xe9' here"; + $this->assertEqual(utf8_bad_replace($str),"this is an invalid char '?' here"); + } + + function testInvalidUtf8Multiple() { + $str = "\xe9Iñtërnâtiôn\xe9àlizætiøn\xe9"; + $this->assertEqual(utf8_bad_replace($str),'?Iñtërnâtiôn?àlizætiøn?'); + } + + function testValidTwoOctetId() { + $str = "abc\xc3\xb1"; + $this->assertEqual(utf8_bad_replace($str),$str); + } + + function testInvalidTwoOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn \xc3\x28 Iñtërnâtiônàlizætiøn"; + $replaced= "Iñtërnâtiônàlizætiøn ?( Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_replace($str),$replaced); + } + + function testInvalidIdBetweenTwoAndThree() { + $str = "Iñtërnâtiônàlizætiøn\xa0\xa1Iñtërnâtiônàlizætiøn"; + $replaced= "Iñtërnâtiônàlizætiøn??Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_replace($str),$replaced); + } + + function testValidThreeOctetId() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x82\xa1Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_replace($str),$str); + } + + function testInvalidThreeOctetSequenceSecond() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x28\xa1Iñtërnâtiônàlizætiøn"; + $replaced= "Iñtërnâtiônàlizætiøn?(?Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_replace($str),$replaced); + } + + function testInvalidThreeOctetSequenceThird() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x82\x28Iñtërnâtiônàlizætiøn"; + $replaced= "Iñtërnâtiônàlizætiøn??(Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_replace($str),$replaced); + } + + function testValidFourOctetId() { + $str = "Iñtërnâtiônàlizætiøn\xf0\x90\x8c\xbcIñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_replace($str),$str); + } + + function testInvalidFourOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xf0\x28\x8c\xbcIñtërnâtiônàlizætiøn"; + $replaced= "Iñtërnâtiônàlizætiøn?(??Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_replace($str),$replaced); + } + + function testInvalidFiveOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xf8\xa1\xa1\xa1\xa1Iñtërnâtiônàlizætiøn"; + $replaced= "Iñtërnâtiônàlizætiøn?????Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_replace($str),$replaced); + } + + function testInvalidSixOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xfc\xa1\xa1\xa1\xa1\xa1Iñtërnâtiônàlizætiøn"; + $replaced= "Iñtërnâtiônàlizætiøn??????Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_replace($str),$replaced); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +class test_utf8_bad_identify extends UnitTestCase { + + function test_utf8_bad_identify() { + $this->UnitTestCase('utf8_bad_identify()'); + } + + function testValidUtf8() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertFalse(utf8_bad_identify($str,$i)); + $this->assertNull($i); + } + + function testValidUtf8Ascii() { + $str = 'testing'; + $this->assertFalse(utf8_bad_identify($str,$i)); + $this->assertNull($i); + } + + function testInvalidUtf8() { + $str = "Iñtërnâtiôn\xe9àlizætiøn"; + $this->assertEqual(utf8_bad_identify($str,$i),UTF8_BAD_SEQINCOMPLETE); + $this->assertEqual($i,15); + } + + function testInvalidUtf8Ascii() { + $str = "this is an invalid char '\xe9' here"; + $this->assertEqual(utf8_bad_identify($str,$i),UTF8_BAD_SEQINCOMPLETE); + $this->assertEqual($i,25); + } + + function testInvalidUtf8Start() { + $str = "\xe9Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_identify($str,$i),UTF8_BAD_SEQINCOMPLETE); + $this->assertEqual($i,0); + } + + function testInvalidUtf8End() { + $str = "Iñtërnâtiônàlizætiøn\xe9"; + $this->assertEqual(utf8_bad_identify($str,$i),UTF8_BAD_SEQINCOMPLETE); + $this->assertEqual($i,27); + } + + function testValidTwoOctetId() { + $str = "abc\xc3\xb1"; + $this->assertFalse(utf8_bad_identify($str,$i)); + $this->assertNull($i); + } + + function testInvalidTwoOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn \xc3\x28 Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_identify($str,$i),UTF8_BAD_SEQINCOMPLETE); + $this->assertEqual($i,28); + } + + function testInvalidIdBetweenTwoAndThree() { + $str = "Iñtërnâtiônàlizætiøn\xa0\xa1Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_identify($str,$i),UTF8_BAD_SEQID); + $this->assertEqual($i,27); + } + + + function testValidThreeOctetId() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x82\xa1Iñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_bad_identify($str,$i)); + $this->assertNull($i); + } + + + function testInvalidThreeOctetSequenceSecond() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x28\xa1Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_identify($str,$i),UTF8_BAD_SEQINCOMPLETE); + $this->assertEqual($i,27); + } + + function testInvalidThreeOctetSequenceThird() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x82\x28Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_identify($str,$i),UTF8_BAD_SEQINCOMPLETE); + $this->assertEqual($i,28); + } + + function testValidFourOctetId() { + $str = "Iñtërnâtiônàlizætiøn\xf0\x90\x8c\xbcIñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_bad_identify($str,$i)); + $this->assertNull($i); + } + + function testInvalidFourOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xf0\x28\x8c\xbcIñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_identify($str,$i),UTF8_BAD_SEQINCOMPLETE); + $this->assertEqual($i,27); + } + + function testInvalidFiveOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xf8\xa1\xa1\xa1\xa1Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_identify($str,$i),UTF8_BAD_5OCTET); + $this->assertEqual($i,27); + } + + function testInvalidSixOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xfc\xa1\xa1\xa1\xa1\xa1Iñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_bad_identify($str,$i),UTF8_BAD_6OCTET); + $this->assertEqual($i,27); + } + + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new GroupTest('utf8_bad'); + $test->addTestCase(new test_utf8_bad_find()); + $test->addTestCase(new test_utf8_bad_findall()); + $test->addTestCase(new test_utf8_bad_strip()); + $test->addTestCase(new test_utf8_bad_replace()); + $test->addTestCase(new test_utf8_bad_identify()); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_ord.test.php b/includes/utf8/tests/cases/utf8_ord.test.php new file mode 100644 index 0000000..ee1e8bf --- /dev/null +++ b/includes/utf8/tests/cases/utf8_ord.test.php @@ -0,0 +1,65 @@ +UnitTestCase('utf8_ord()'); + } + + function testEmptyStr() { + $str = ''; + $this->assertEqual(utf8_ord($str),0); + } + + function testAsciiChar() { + $str = 'a'; + $this->assertEqual(utf8_ord($str),97); + } + + function test2ByteChar() { + $str = 'ñ'; + $this->assertEqual(utf8_ord($str),241); + } + + function test3ByteChar() { + $str = '₧'; + $this->assertEqual(utf8_ord($str),8359); + } + + function test4ByteChar() { + $str = "\xf0\x90\x8c\xbc"; + $this->assertEqual(utf8_ord($str),66364); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_ord(); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_position.test.php b/includes/utf8/tests/cases/utf8_position.test.php new file mode 100644 index 0000000..ef99551 --- /dev/null +++ b/includes/utf8/tests/cases/utf8_position.test.php @@ -0,0 +1,136 @@ +assertIdentical(utf8_byte_position($str, 3), 3); + $this->assertIdentical(utf8_byte_position($str, 3, 4), array(3,4)); + $this->assertIdentical(utf8_byte_position($str, -1), 0); + $this->assertIdentical(utf8_byte_position($str, 8), 7); + } + + function test_multibyte_char_to_byte() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertIdentical(utf8_byte_position($str, 3), 4); + $this->assertIdentical(utf8_byte_position($str, 3, 5), array(4,7)); + $this->assertIdentical(utf8_byte_position($str, -1), 0); + $this->assertIdentical(utf8_byte_position($str, 28), 27); + } + + // tests for utf8_locate_current_chr & utf8_locate_next_chr + function test_singlebyte(){ + $tests = array(); + + // single byte, should return current index + $tests[] = array('aaживπά우리をあöä',0,0); + $tests[] = array('aaживπά우리をあöä',1,1); + + foreach($tests as $test){ + $this->assertIdentical(utf8_locate_current_chr($test[0],$test[1]),$test[2]); + } + + $tests = array(); + $tests[] = array('aaживπά우리をあöä',1,1); + + foreach($tests as $test){ + $this->assertIdentical(utf8_locate_next_chr($test[0],$test[1]),$test[2]); + } + + } + + function test_twobyte(){ + // two byte, should move to boundary, expect even number + $tests = array(); + $tests[] = array('aaживπά우리をあöä',2,2); + $tests[] = array('aaживπά우리をあöä',3,2); + $tests[] = array('aaживπά우리をあöä',4,4); + + foreach($tests as $test){ + $this->assertIdentical(utf8_locate_current_chr($test[0],$test[1]),$test[2]); + } + + $tests = array(); + $tests[] = array('aaживπά우리をあöä',2,2); + $tests[] = array('aaживπά우리をあöä',3,4); + $tests[] = array('aaживπά우리をあöä',4,4); + + foreach($tests as $test){ + $this->assertIdentical(utf8_locate_next_chr($test[0],$test[1]),$test[2]); + } + } + + function test_threebyte(){ + // three byte, should move to boundary 10 or 13 + $tests = array(); + $tests[] = array('aaживπά우리をあöä',10,10); + $tests[] = array('aaживπά우리をあöä',11,10); + $tests[] = array('aaживπά우리をあöä',12,10); + $tests[] = array('aaживπά우리をあöä',13,13); + + foreach($tests as $test){ + $this->assertIdentical(utf8_locate_current_chr($test[0],$test[1]),$test[2]); + } + + $tests = array(); + $tests[] = array('aaживπά우리をあöä',10,10); + $tests[] = array('aaживπά우리をあöä',11,13); + $tests[] = array('aaживπά우리をあöä',12,13); + $tests[] = array('aaживπά우리をあöä',13,13); + + foreach($tests as $test){ + $this->assertIdentical(utf8_locate_next_chr($test[0],$test[1]),$test[2]); + } + } + + function test_bounds(){ + // bounds checking + $tests = array(); + $tests[] = array('aaживπά우리をあöä',-2,0); + $tests[] = array('aaживπά우리をあöä',128,29); + + foreach($tests as $test){ + $this->assertIdentical(utf8_locate_current_chr($test[0],$test[1]),$test[2]); + } + + $tests[] = array('aaживπά우리をあöä',-2,0); + $tests[] = array('aaживπά우리をあöä',128,29); + + foreach($tests as $test){ + $this->assertIdentical(utf8_locate_next_chr($test[0],$test[1]),$test[2]); + } + } + + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_position(); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_specials.test.php b/includes/utf8/tests/cases/utf8_specials.test.php new file mode 100644 index 0000000..326e6ab --- /dev/null +++ b/includes/utf8/tests/cases/utf8_specials.test.php @@ -0,0 +1,86 @@ +UnitTestCase('test_utf8_is_word_chars()'); + } + + function testEmptyString() { + $this->assertTrue(utf8_is_word_chars('')); + } + + function testAllWordChars() { + $this->assertTrue(utf8_is_word_chars('HelloWorld')); + } + + function testSpecials() { + $str = 'Hello ' . + chr(0xe0 | (0x2234 >> 12)) . + chr(0x80 | ((0x2234 >> 6) & 0x003f)) . + chr(0x80 | (0x2234 & 0x003f)) . + ' World'; + $this->assertFalse(utf8_is_word_chars($str)); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +class test_utf8_strip_specials extends UnitTestCase { + + function test_utf8_strip_specials() { + $this->UnitTestCase('test_utf8_strip_specials()'); + } + + function testEmptyString() { + $this->assertEqual(utf8_strip_specials(''),''); + } + + function testStrip() { + $str = 'Hello ' . + chr(0xe0 | (0x2234 >> 12)) . + chr(0x80 | ((0x2234 >> 6) & 0x003f)) . + chr(0x80 | (0x2234 & 0x003f)) . + ' World'; + $this->assertEqual(utf8_strip_specials($str),'HelloWorld'); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new GroupTest('utf8_ascii'); + $test->addTestCase(new test_utf8_strip_specials()); + $test->addTestCase(new test_utf8_is_word_chars()); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_str_ireplace.test.php b/includes/utf8/tests/cases/utf8_str_ireplace.test.php new file mode 100644 index 0000000..810c81a --- /dev/null +++ b/includes/utf8/tests/cases/utf8_str_ireplace.test.php @@ -0,0 +1,132 @@ +UnitTestCase('test_utf8_str_ireplace()'); + } + + function testReplace() { + $str = 'Iñtërnâtiônàlizætiøn'; + $replaced = 'Iñtërnâtiônàlisetiøn'; + $this->assertEqual(utf8_ireplace('lIzÆ','lise',$str),$replaced); + } + + function testReplaceNoMatch() { + $str = 'Iñtërnâtiônàlizætiøn'; + $replaced = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_ireplace('foo','bar',$str),$replaced); + } + + function testEmptyString() { + $str = ''; + $replaced = ''; + $this->assertEqual(utf8_ireplace('foo','bar',$str),$replaced); + } + + function testEmptySearch() { + $str = 'Iñtërnâtiônàlizætiøn'; + $replaced = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_ireplace('','x',$str),$replaced); + } + + function testReplaceCount() { + $str = 'Iñtërnâtiônàlizætiøn'; + $replaced = 'IñtërXâtiôXàlizætiøn'; + $this->assertEqual(utf8_ireplace('n','X',$str,2),$replaced); + } + + function testReplaceDifferentSearchReplaceLength() { + $str = 'Iñtërnâtiônàlizætiøn'; + $replaced = 'IñtërXXXâtiôXXXàlizætiøXXX'; + $this->assertEqual(utf8_ireplace('n','XXX',$str),$replaced); + } + + function testReplaceArrayAsciiSearch() { + $str = 'Iñtërnâtiônàlizætiøn'; + $replaced = 'Iñyërxâyiôxàlizæyiøx'; + $this->assertEqual( + utf8_ireplace( + array('n','t'), + array('x','y'), + $str + ),$replaced); + } + + function testReplaceArrayUTF8Search() { + $str = 'Iñtërnâtiônàlizætiøn'; + $replaced = 'I?tërnâti??nàliz????ti???n'; + $this->assertEqual( + utf8_ireplace( + array('Ñ','ô','ø','Æ'), + array('?','??','???','????'), + $str + ),$replaced); + } + + function testReplaceArrayStringReplace() { + $str = 'Iñtërnâtiônàlizætiøn'; + $replaced = 'I?tërnâti?nàliz?ti?n'; + $this->assertEqual( + utf8_ireplace( + array('Ñ','ô','ø','Æ'), + '?', + $str + ),$replaced); + } + + function testReplaceArraySingleArrayReplace() { + $str = 'Iñtërnâtiônàlizætiøn'; + $replaced = 'I?tërnâtinàliztin'; + $this->assertEqual( + utf8_ireplace( + array('Ñ','ô','ø','Æ'), + array('?'), + $str + ),$replaced); + } + + function testReplaceLinefeed() { + $str = "Iñtërnâti\nônàlizætiøn"; + $replaced = "Iñtërnâti\nônàlisetiøn"; + $this->assertEqual(utf8_ireplace('lIzÆ','lise',$str),$replaced); + } + + function testReplaceLinefeedSearch() { + $str = "Iñtërnâtiônàli\nzætiøn"; + $replaced = "Iñtërnâtiônàlisetiøn"; + $this->assertEqual(utf8_ireplace("lI\nzÆ",'lise',$str),$replaced); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = & new test_utf8_str_ireplace(); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_str_pad.test.php b/includes/utf8/tests/cases/utf8_str_pad.test.php new file mode 100644 index 0000000..c979ea3 --- /dev/null +++ b/includes/utf8/tests/cases/utf8_str_pad.test.php @@ -0,0 +1,52 @@ + +* @package utf8 +* @subpackage Tests +*/ +class test_utf8_str_pad extends UnitTestCase { + public function test(){ + $toPad = ''; // 10 characters + $padding = 'ø__'; // 4 characters + + $this->assertEqual(utf8_str_pad($toPad, 20), $toPad . ' '); + $this->assertEqual(utf8_str_pad($toPad, 20, ' ', STR_PAD_LEFT), ' ' . $toPad); + $this->assertEqual(utf8_str_pad($toPad, 20, ' ', STR_PAD_BOTH), ' ' . $toPad . ' '); + + $this->assertEqual(utf8_str_pad($toPad, 10), $toPad); + $this->assertEqual(str_pad('5char', 4), '5char'); // str_pos won't truncate input string + $this->assertEqual(utf8_str_pad($toPad, 8), $toPad); + + $this->assertEqual(utf8_str_pad($toPad, 20, $padding, STR_PAD_RIGHT), $toPad . 'ø__ø__ø__ø'); + $this->assertEqual(utf8_str_pad($toPad, 20, $padding, STR_PAD_LEFT), 'ø__ø__ø__ø' . $toPad); + $this->assertEqual(utf8_str_pad($toPad, 20, $padding, STR_PAD_BOTH), 'ø__ø_' . $toPad . 'ø__ø_'); + } +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_str_pad (); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_str_split.test.php b/includes/utf8/tests/cases/utf8_str_split.test.php new file mode 100644 index 0000000..2b28607 --- /dev/null +++ b/includes/utf8/tests/cases/utf8_str_split.test.php @@ -0,0 +1,82 @@ +UnitTestCase('utf8_str_split()'); + } + + function testSplitOneChar() { + $str = 'Iñtërnâtiônàlizætiøn'; + $array = array( + 'I','ñ','t','ë','r','n','â','t','i','ô','n','à','l','i', + 'z','æ','t','i','ø','n', + ); + $this->assertEqual(utf8_str_split($str),$array); + } + + function testSplitFiveChars() { + $str = 'Iñtërnâtiônàlizætiøn'; + $array = array( + 'Iñtër','nâtiô','nàliz','ætiøn', + ); + $this->assertEqual(utf8_str_split($str,5),$array); + } + + function testSplitSixChars() { + $str = 'Iñtërnâtiônàlizætiøn'; + $array = array( + 'Iñtërn','âtiônà', 'lizæti','øn', + ); + $this->assertEqual(utf8_str_split($str,6),$array); + } + + function testSplitLong() { + $str = 'Iñtërnâtiônàlizætiøn'; + $array = array( + 'Iñtërnâtiônàlizætiøn', + ); + $this->assertEqual(utf8_str_split($str,40),$array); + } + + function testSplitNewline() { + $str = "Iñtërn\nâtiônàl\nizætiøn\n"; + $array = array( + 'I','ñ','t','ë','r','n',"\n",'â','t','i','ô','n','à','l',"\n",'i', + 'z','æ','t','i','ø','n',"\n", + ); + $this->assertEqual(utf8_str_split($str),$array); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_str_split(); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_strcasecmp.test.php b/includes/utf8/tests/cases/utf8_strcasecmp.test.php new file mode 100644 index 0000000..bc9036d --- /dev/null +++ b/includes/utf8/tests/cases/utf8_strcasecmp.test.php @@ -0,0 +1,83 @@ +UnitTestCase('test_utf8_strcasecmp()'); + } + + function testCompareEqual() { + $strX = 'iñtërnâtiônàlizætiøn'; + $strY = 'IÑTËRNÂTIÔNÀLIZÆTIØN'; + $this->assertEqual(utf8_strcasecmp($strX,$strY),0); + } + + + function testLess() { + $strX = 'iñtërnâtiônàlizætiøn'; + $strY = 'IÑTËRNÂTIÔÀLIZÆTIØN'; + $this->assertTrue(utf8_strcasecmp($strX,$strY) < 0); + } + + function testGreater() { + $strX = 'iñtërnâtiôàlizætiøn'; + $strY = 'IÑTËRNÂTIÔNÀLIZÆTIØN'; + $this->assertTrue(utf8_strcasecmp($strX,$strY) > 0); + } + + function testEmptyX() { + $strX = ''; + $strY = 'IÑTËRNÂTIÔNÀLIZÆTIØN'; + $this->assertTrue(utf8_strcasecmp($strX,$strY) < 0); + } + + function testEmptyY() { + $strX = 'iñtërnâtiôàlizætiøn'; + $strY = ''; + $this->assertTrue(utf8_strcasecmp($strX,$strY) > 0); + } + + function testEmptyBoth() { + $strX = ''; + $strY = ''; + $this->assertTrue(utf8_strcasecmp($strX,$strY) == 0); + } + + function testLinefeed() { + $strX = "iñtërnâtiôn\nàlizætiøn"; + $strY = "IÑTËRNÂTIÔN\nÀLIZÆTIØN"; + $this->assertTrue(utf8_strcasecmp($strX,$strY) == 0); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_strcasecmp (); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_strcspn.test.php b/includes/utf8/tests/cases/utf8_strcspn.test.php new file mode 100644 index 0000000..c2a08bd --- /dev/null +++ b/includes/utf8/tests/cases/utf8_strcspn.test.php @@ -0,0 +1,69 @@ +UnitTestCase('test_utf8_strcspn()'); + } + + function testNoMatchSingleByteSearch() { + $str = 'iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_strcspn($str,'t'),2); + } + + function testNoMatchMultiByteSearch() { + $str = 'iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_strcspn($str,'â'),6); + } + + function testCompareStrspn() { + $str = 'aeioustr'; + $this->assertEqual(utf8_strcspn($str,'tr'),strcspn($str,'tr')); + } + + function testMatchAscii() { + $str = 'internationalization'; + $this->assertEqual(utf8_strcspn($str,'a'),strcspn($str,'a')); + } + + function testLinefeed() { + $str = "i\nñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_strcspn($str,'t'),3); + } + + function testLinefeedMask() { + $str = "i\nñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_strcspn($str,"\n"),1); + } +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_strcspn (); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_stristr.test.php b/includes/utf8/tests/cases/utf8_stristr.test.php new file mode 100644 index 0000000..5982b98 --- /dev/null +++ b/includes/utf8/tests/cases/utf8_stristr.test.php @@ -0,0 +1,81 @@ +UnitTestCase('test_utf8_stristr()'); + } + + function testSubstr() { + $str = 'iñtërnâtiônàlizætiøn'; + $search = 'NÂT'; + $this->assertEqual(utf8_stristr($str,$search),'nâtiônàlizætiøn'); + } + + function testSubstrNoMatch() { + $str = 'iñtërnâtiônàlizætiøn'; + $search = 'foo'; + $this->assertFalse(utf8_stristr($str,$search)); + } + + function testEmptySearch() { + $str = 'iñtërnâtiônàlizætiøn'; + $search = ''; + $this->assertEqual(utf8_stristr($str,$search),'iñtërnâtiônàlizætiøn'); + } + + function testEmptyStr() { + $str = ''; + $search = 'NÂT'; + $this->assertFalse(utf8_stristr($str,$search)); + } + + function testEmptyBoth() { + $str = ''; + $search = ''; + $this->assertEqual(utf8_stristr($str,$search),''); + } + + function testLinefeedStr() { + $str = "iñt\nërnâtiônàlizætiøn"; + $search = 'NÂT'; + $this->assertEqual(utf8_stristr($str,$search),'nâtiônàlizætiøn'); + } + + function testLinefeedBoth() { + $str = "iñtërn\nâtiônàlizætiøn"; + $search = "N\nÂT"; + $this->assertEqual(utf8_stristr($str,$search),"n\nâtiônàlizætiøn"); + } +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_stristr (); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_strlen.test.php b/includes/utf8/tests/cases/utf8_strlen.test.php new file mode 100644 index 0000000..645255d --- /dev/null +++ b/includes/utf8/tests/cases/utf8_strlen.test.php @@ -0,0 +1,60 @@ +UnitTestCase('utf8_strlen()'); + } + + function testUtf8() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_strlen($str),20); + } + + function testUtf8Invalid() { + $str = "Iñtërnâtiôn\xe9àlizætiøn"; + $this->assertEqual(utf8_strlen($str),20); + } + + function testAscii() { + $str = 'ABC 123'; + $this->assertEqual(utf8_strlen($str),7); + } + + function testEmptyStr() { + $str = ''; + $this->assertEqual(utf8_strlen($str),0); + } + + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_strlen(); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_strpos.test.php b/includes/utf8/tests/cases/utf8_strpos.test.php new file mode 100644 index 0000000..4c06726 --- /dev/null +++ b/includes/utf8/tests/cases/utf8_strpos.test.php @@ -0,0 +1,69 @@ +UnitTestCase('utf8_strpos()'); + } + + function testUtf8() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_strpos($str,'â'),6); + } + + function testUtf8Offset() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_strpos($str,'n',11),19); + } + + function testUtf8Invalid() { + $str = "Iñtërnâtiôn\xe9àlizætiøn"; + $this->assertEqual(utf8_strpos($str,'æ'),15); + } + + function testAscii() { + $str = 'ABC 123'; + $this->assertEqual(utf8_strpos($str,'B'),1); + } + + function testVsStrpos() { + $str = 'ABC 123 ABC'; + $this->assertEqual(utf8_strpos($str,'B',3),strpos($str,'B',3)); + } + + function testEmptyStr() { + $str = ''; + $this->assertFalse(utf8_strpos($str,'x')); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_strpos(); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_strrev.test.php b/includes/utf8/tests/cases/utf8_strrev.test.php new file mode 100644 index 0000000..a5cc8df --- /dev/null +++ b/includes/utf8/tests/cases/utf8_strrev.test.php @@ -0,0 +1,58 @@ +UnitTestCase('utf8_strrev()'); + } + + function testReverse() { + $str = 'Iñtërnâtiônàlizætiøn'; + $rev = 'nøitæzilànôitânrëtñI'; + $this->assertEqual(utf8_strrev($str),$rev); + } + + function testEmptyStr() { + $str = ''; + $rev = ''; + $this->assertEqual(utf8_strrev($str),$rev); + } + + function testLinefeed() { + $str = "Iñtërnâtiôn\nàlizætiøn"; + $rev = "nøitæzilà\nnôitânrëtñI"; + $this->assertEqual(utf8_strrev($str),$rev); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_strrev(); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_strrpos.test.php b/includes/utf8/tests/cases/utf8_strrpos.test.php new file mode 100644 index 0000000..6754b43 --- /dev/null +++ b/includes/utf8/tests/cases/utf8_strrpos.test.php @@ -0,0 +1,78 @@ +UnitTestCase('utf8_strrpos()'); + } + + function testUtf8() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_strrpos($str,'i'),17); + } + + function testUtf8Offset() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_strrpos($str,'n',11),19); + } + + function testUtf8Invalid() { + $str = "Iñtërnâtiôn\xe9àlizætiøn"; + $this->assertEqual(utf8_strrpos($str,'æ'),15); + } + + function testAscii() { + $str = 'ABC ABC'; + $this->assertEqual(utf8_strrpos($str,'B'),5); + } + + function testVsStrpos() { + $str = 'ABC 123 ABC'; + $this->assertEqual(utf8_strrpos($str,'B'),strrpos($str,'B')); + } + + function testEmptyStr() { + $str = ''; + $this->assertFalse(utf8_strrpos($str,'x')); + } + + function testLinefeed() { + $str = "Iñtërnâtiônàlizætiø\nn"; + $this->assertEqual(utf8_strrpos($str,'i'),17); + } + + function testLinefeedSearch() { + $str = "Iñtërnâtiônàlizætiø\nn"; + $this->assertEqual(utf8_strrpos($str,"\n"),19); + } +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_strrpos(); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_strspn.test.php b/includes/utf8/tests/cases/utf8_strspn.test.php new file mode 100644 index 0000000..9e59066 --- /dev/null +++ b/includes/utf8/tests/cases/utf8_strspn.test.php @@ -0,0 +1,70 @@ +UnitTestCase('test_utf8_strspn()'); + } + + function testMatch() { + $str = 'iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_strspn($str,'âëiônñrt'),11); + } + + function testMatchTwo() { + $str = 'iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_strspn($str,'iñtë'),4); + } + + function testCompareStrspn() { + $str = 'aeioustr'; + $this->assertEqual(utf8_strspn($str,'saeiou'),strspn($str,'saeiou')); + } + + function testMatchAscii() { + $str = 'internationalization'; + $this->assertEqual(utf8_strspn($str,'aeionrt'),strspn($str,'aeionrt')); + } + + function testLinefeed() { + $str = "iñtërnât\niônàlizætiøn"; + $this->assertEqual(utf8_strspn($str,'âëiônñrt'),8); + } + + function testLinefeedMask() { + $str = "iñtërnât\niônàlizætiøn"; + $this->assertEqual(utf8_strspn($str,"âëiônñrt\n"),12); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_strspn (); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_strtolower.test.php b/includes/utf8/tests/cases/utf8_strtolower.test.php new file mode 100644 index 0000000..8287b44 --- /dev/null +++ b/includes/utf8/tests/cases/utf8_strtolower.test.php @@ -0,0 +1,50 @@ +UnitTestCase('utf8_strtolower()'); + } + + function testLower() { + $str = 'IÑTËRNÂTIÔNÀLIZÆTIØN'; + $lower = 'iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_strtolower($str),$lower); + } + + function testEmptyString() { + $str = ''; + $lower = ''; + $this->assertEqual(utf8_strtolower($str),$lower); + } +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_strtolower(); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_strtoupper.test.php b/includes/utf8/tests/cases/utf8_strtoupper.test.php new file mode 100644 index 0000000..c21b75d --- /dev/null +++ b/includes/utf8/tests/cases/utf8_strtoupper.test.php @@ -0,0 +1,50 @@ +UnitTestCase('utf8_strtoupper()'); + } + + function testUpper() { + $str = 'iñtërnâtiônàlizætiøn'; + $upper = 'IÑTËRNÂTIÔNÀLIZÆTIØN'; + $this->assertEqual(utf8_strtoupper($str),$upper); + } + + function testEmptyString() { + $str = ''; + $upper = ''; + $this->assertEqual(utf8_strtoupper($str),$upper); + } +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_strtoupper(); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_substr.test.php b/includes/utf8/tests/cases/utf8_substr.test.php new file mode 100644 index 0000000..3b5b1a4 --- /dev/null +++ b/includes/utf8/tests/cases/utf8_substr.test.php @@ -0,0 +1,104 @@ +UnitTestCase('utf8_substr()'); + } + + function testUtf8() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_substr($str,0,2),'Iñ'); + } + + function testUtf8Two() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_substr($str,2,2),'të'); + } + + function testUtf8Zero() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_substr($str,0),'Iñtërnâtiônàlizætiøn'); + } + + function testUtf8ZeroZero() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_substr($str,0,0),''); + } + + function testStartGreatThanLength() { + $str = 'Iñt'; + $this->assertFalse(utf8_substr($str,4)); + } + + function testCompareStartGreatThanLength() { + $str = 'abc'; + $this->assertEqual(utf8_substr($str,4),substr($str,4)); + } + + function testLengthBeyondString() { + $str = 'Iñt'; + $this->assertEqual(utf8_substr($str,1,5),'ñt'); + } + + function testCompareLengthBeyondString() { + $str = 'abc'; + $this->assertEqual(utf8_substr($str,1,5),substr($str,1,5)); + } + + function testStartNegative() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_substr($str,-4), 'tiøn'); + } + + function testLengthNegative() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_substr($str,10,-2), 'nàlizæti'); + } + + function testStartLengthNegative() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_substr($str,-4,-2), 'ti'); + } + + function testLinefeed() { + $str = "Iñ\ntërnâtiônàlizætiøn"; + $this->assertEqual(utf8_substr($str,1,5),"ñ\ntër"); + } + + function testLongLength() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_substr($str,0,15536),'Iñtërnâtiônàlizætiøn'); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = & new test_utf8_substr(); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_substr_replace.test.php b/includes/utf8/tests/cases/utf8_substr_replace.test.php new file mode 100644 index 0000000..97b6c14 --- /dev/null +++ b/includes/utf8/tests/cases/utf8_substr_replace.test.php @@ -0,0 +1,76 @@ +UnitTestCase('utf8_str_split()'); + } + + function testReplaceStart() { + $str = 'Iñtërnâtiônàlizætiøn'; + $replaced = 'IñtërnâtX'; + $this->assertEqual(utf8_substr_replace($str,'X',8),$replaced); + } + + function testEmptyString() { + $str = ''; + $replaced = 'X'; + $this->assertEqual(utf8_substr_replace($str,'X',8),$replaced); + } + + function testNegative() { + $str = 'testing'; + $replaced = substr_replace($str,'foo',-2,-2); + $this->assertEqual(utf8_substr_replace($str,'foo',-2,-2),$replaced); + } + + function testZero() { + $str = 'testing'; + $replaced = substr_replace($str,'foo',0,0); + $this->assertEqual(utf8_substr_replace($str,'foo',0,0),$replaced); + } + + function testLinefeed() { + $str = "Iñ\ntërnâtiônàlizætiøn"; + $replaced = "Iñ\ntërnâtX"; + $this->assertEqual(utf8_substr_replace($str,'X',9),$replaced); + } + + function testLinefeedReplace() { + $str = "Iñ\ntërnâtiônàlizætiøn"; + $replaced = "Iñ\ntërnâtX\nY"; + $this->assertEqual(utf8_substr_replace($str,"X\nY",9),$replaced); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_native_substr_replace(); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_trim.test.php b/includes/utf8/tests/cases/utf8_trim.test.php new file mode 100644 index 0000000..c8a548b --- /dev/null +++ b/includes/utf8/tests/cases/utf8_trim.test.php @@ -0,0 +1,158 @@ +UnitTestCase('test_utf8_ltrim()'); + } + + function testTrim() { + $str = 'ñtërnâtiônàlizætiøn'; + $trimmed = 'tërnâtiônàlizætiøn'; + $this->assertEqual(utf8_ltrim($str,'ñ'),$trimmed); + } + + function testNoTrim() { + $str = ' Iñtërnâtiônàlizætiøn'; + $trimmed = ' Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_ltrim($str,'ñ'),$trimmed); + } + + function testEmptyString() { + $str = ''; + $trimmed = ''; + $this->assertEqual(utf8_ltrim($str),$trimmed); + } + + function testForwardSlash() { + $str = '/Iñtërnâtiônàlizætiøn'; + $trimmed = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_ltrim($str,'/'),$trimmed); + } + + function testNegateCharClass() { + $str = 'Iñtërnâtiônàlizætiøn'; + $trimmed = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_ltrim($str,'^s'),$trimmed); + } + + function testLinefeed() { + $str = "ñ\nñtërnâtiônàlizætiøn"; + $trimmed = "\nñtërnâtiônàlizætiøn"; + $this->assertEqual(utf8_ltrim($str,'ñ'),$trimmed); + } + + function testLinefeedMask() { + $str = "ñ\nñtërnâtiônàlizætiøn"; + $trimmed = "tërnâtiônàlizætiøn"; + $this->assertEqual(utf8_ltrim($str,"ñ\n"),$trimmed); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +class test_utf8_rtrim extends UnitTestCase { + + function test_utf8_rtrim() { + $this->UnitTestCase('test_utf8_rtrim()'); + } + + function testTrim() { + $str = 'Iñtërnâtiônàlizætiø'; + $trimmed = 'Iñtërnâtiônàlizæti'; + $this->assertEqual(utf8_rtrim($str,'ø'),$trimmed); + } + + function testNoTrim() { + $str = 'Iñtërnâtiônàlizætiøn '; + $trimmed = 'Iñtërnâtiônàlizætiøn '; + $this->assertEqual(utf8_rtrim($str,'ø'),$trimmed); + } + + function testEmptyString() { + $str = ''; + $trimmed = ''; + $this->assertEqual(utf8_rtrim($str),$trimmed); + } + + function testLinefeed() { + $str = "Iñtërnâtiônàlizætiø\nø"; + $trimmed = "Iñtërnâtiônàlizætiø\n"; + $this->assertEqual(utf8_rtrim($str,'ø'),$trimmed); + } + + function testLinefeedMask() { + $str = "Iñtërnâtiônàlizætiø\nø"; + $trimmed = "Iñtërnâtiônàlizæti"; + $this->assertEqual(utf8_rtrim($str,"ø\n"),$trimmed); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +class test_utf8_trim extends UnitTestCase { + + function test_utf8_trim() { + $this->UnitTestCase('test_utf8_trim()'); + } + + function testTrim() { + $str = 'ñtërnâtiônàlizætiø'; + $trimmed = 'tërnâtiônàlizæti'; + $this->assertEqual(utf8_trim($str,'ñø'),$trimmed); + } + + function testNoTrim() { + $str = ' Iñtërnâtiônàlizætiøn '; + $trimmed = ' Iñtërnâtiônàlizætiøn '; + $this->assertEqual(utf8_trim($str,'ñø'),$trimmed); + } + + function testEmptyString() { + $str = ''; + $trimmed = ''; + $this->assertEqual(utf8_trim($str),$trimmed); + } +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = & new GroupTest('utf8_trim tests'); + $test->addTestCase(new test_utf8_ltrim()); + $test->addTestCase(new test_utf8_rtrim()); + $test->addTestCase(new test_utf8_trim()); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_ucfirst.test.php b/includes/utf8/tests/cases/utf8_ucfirst.test.php new file mode 100644 index 0000000..4365276 --- /dev/null +++ b/includes/utf8/tests/cases/utf8_ucfirst.test.php @@ -0,0 +1,75 @@ +UnitTestCase('test_utf8_ucfirst()'); + } + + function testUcfirst() { + $str = 'ñtërnâtiônàlizætiøn'; + $ucfirst = 'Ñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_ucfirst($str),$ucfirst); + } + + function testUcfirstSpace() { + $str = ' iñtërnâtiônàlizætiøn'; + $ucfirst = ' iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_ucfirst($str),$ucfirst); + } + + function testUcfirstUpper() { + $str = 'Ñtërnâtiônàlizætiøn'; + $ucfirst = 'Ñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_ucfirst($str),$ucfirst); + } + + function testEmptyString() { + $str = ''; + $this->assertEqual(utf8_ucfirst($str),''); + } + + function testOneChar() { + $str = 'ñ'; + $ucfirst = "Ñ"; + $this->assertEqual(utf8_ucfirst($str),$ucfirst); + } + + function testLinefeed() { + $str = "ñtërn\nâtiônàlizætiøn"; + $ucfirst = "Ñtërn\nâtiônàlizætiøn"; + $this->assertEqual(utf8_ucfirst($str),$ucfirst); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_ucfirst(); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_ucwords.test.php b/includes/utf8/tests/cases/utf8_ucwords.test.php new file mode 100644 index 0000000..6e8b57c --- /dev/null +++ b/includes/utf8/tests/cases/utf8_ucwords.test.php @@ -0,0 +1,76 @@ +UnitTestCase('test_utf8_ucwords ()'); + } + + function testUcword() { + $str = 'iñtërnâtiônàlizætiøn'; + $ucwords = 'Iñtërnâtiônàlizætiøn'; + $this->assertEqual(utf8_ucwords($str),$ucwords); + } + + function testUcwords() { + $str = 'iñt ërn âti ônà liz æti øn'; + $ucwords = 'Iñt Ërn Âti Ônà Liz Æti Øn'; + $this->assertEqual(utf8_ucwords($str),$ucwords); + } + + function testUcwordsNewline() { + $str = "iñt ërn âti\n ônà liz æti øn"; + $ucwords = "Iñt Ërn Âti\n Ônà Liz Æti Øn"; + $this->assertEqual(utf8_ucwords($str),$ucwords); + } + + function testEmptyString() { + $str = ''; + $ucwords = ''; + $this->assertEqual(utf8_ucwords($str),$ucwords); + } + + function testOneChar() { + $str = 'ñ'; + $ucwords = 'Ñ'; + $this->assertEqual(utf8_ucwords($str),$ucwords); + } + + function testLinefeed() { + $str = "iñt ërn âti\n ônà liz æti øn"; + $ucwords = "Iñt Ërn Âti\n Ônà Liz Æti Øn"; + $this->assertEqual(utf8_ucwords($str),$ucwords); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new test_utf8_ucwords (); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_unicode.test.php b/includes/utf8/tests/cases/utf8_unicode.test.php new file mode 100644 index 0000000..7fd6be6 --- /dev/null +++ b/includes/utf8/tests/cases/utf8_unicode.test.php @@ -0,0 +1,117 @@ +UnitTestCase('test_utf8_to_unicode()'); + } + + function testEmptyString() { + $this->assertEqual(utf8_to_unicode(''),array()); + } + + function testString() { + + $unicode = array(); + $unicode[0] = 73; + $unicode[1] = 241; + $unicode[2] = 116; + $unicode[3] = 235; + $unicode[4] = 114; + $unicode[5] = 110; + $unicode[6] = 226; + $unicode[7] = 116; + $unicode[8] = 105; + $unicode[9] = 244; + $unicode[10] = 110; + $unicode[11] = 224; + $unicode[12] = 108; + $unicode[13] = 105; + $unicode[14] = 122; + $unicode[15] = 230; + $unicode[16] = 116; + $unicode[17] = 105; + $unicode[18] = 248; + $unicode[19] = 110; + + $this->assertEqual(utf8_to_unicode('Iñtërnâtiônàlizætiøn'), $unicode); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +class test_utf8_from_unicode extends UnitTestCase { + + function test_utf8_strip_specials() { + $this->UnitTestCase('test_utf8_strip_specials()'); + } + + function testEmptyArray() { + $this->assertEqual(utf8_from_unicode(array()),''); + } + + function testArray() { + + $unicode = array(); + $unicode[0] = 73; + $unicode[1] = 241; + $unicode[2] = 116; + $unicode[3] = 235; + $unicode[4] = 114; + $unicode[5] = 110; + $unicode[6] = 226; + $unicode[7] = 116; + $unicode[8] = 105; + $unicode[9] = 244; + $unicode[10] = 110; + $unicode[11] = 224; + $unicode[12] = 108; + $unicode[13] = 105; + $unicode[14] = 122; + $unicode[15] = 230; + $unicode[16] = 116; + $unicode[17] = 105; + $unicode[18] = 248; + $unicode[19] = 110; + + $this->assertEqual(utf8_from_unicode($unicode), 'Iñtërnâtiônàlizætiøn'); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new GroupTest('utf8_ascii'); + $test->addTestCase(new test_utf8_to_unicode()); + $test->addTestCase(new test_utf8_from_unicode()); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cases/utf8_validation.test.php b/includes/utf8/tests/cases/utf8_validation.test.php new file mode 100644 index 0000000..75d2497 --- /dev/null +++ b/includes/utf8/tests/cases/utf8_validation.test.php @@ -0,0 +1,205 @@ +UnitTestCase('utf8_is_valid()'); + } + + function testValidUtf8() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertTrue(utf8_is_valid($str)); + } + + function testValidUtf8Ascii() { + $str = 'ABC 123'; + $this->assertTrue(utf8_is_valid($str)); + } + + function testInvalidUtf8() { + $str = "Iñtërnâtiôn\xe9àlizætiøn"; + $this->assertFalse(utf8_is_valid($str)); + } + + function testInvalidUtf8Ascii() { + $str = "this is an invalid char '\xe9' here"; + $this->assertFalse(utf8_is_valid($str)); + } + + function testEmptyString() { + $str = ''; + $this->assertTrue(utf8_is_valid($str)); + } + + function testValidTwoOctetId() { + $str = "\xc3\xb1"; + $this->assertTrue(utf8_is_valid($str)); + } + + function testInvalidTwoOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn \xc3\x28 Iñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_is_valid($str)); + } + + function testInvalidIdBetweenTwoAndThree() { + $str = "Iñtërnâtiônàlizætiøn\xa0\xa1Iñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_is_valid($str)); + } + + function testValidThreeOctetId() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x82\xa1Iñtërnâtiônàlizætiøn"; + $this->assertTrue(utf8_is_valid($str)); + } + + function testInvalidThreeOctetSequenceSecond() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x28\xa1Iñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_is_valid($str)); + } + + function testInvalidThreeOctetSequenceThird() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x82\x28Iñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_is_valid($str)); + } + + function testValidFourOctetId() { + $str = "Iñtërnâtiônàlizætiøn\xf0\x90\x8c\xbcIñtërnâtiônàlizætiøn"; + $this->assertTrue(utf8_is_valid($str)); + } + + function testInvalidFourOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xf0\x28\x8c\xbcIñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_is_valid($str)); + } + + function testInvalidFiveOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xf8\xa1\xa1\xa1\xa1Iñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_is_valid($str)); + } + + function testInvalidSixOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xfc\xa1\xa1\xa1\xa1\xa1Iñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_is_valid($str)); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +class test_utf8_compliant extends UnitTestCase { + + function test_utf8_compliant() { + $this->UnitTestCase('utf8_compliant()'); + } + + function testValidUtf8() { + $str = 'Iñtërnâtiônàlizætiøn'; + $this->assertTrue(utf8_compliant($str)); + } + + function testValidUtf8Ascii() { + $str = 'ABC 123'; + $this->assertTrue(utf8_compliant($str)); + } + + function testInvalidUtf8() { + $str = "Iñtërnâtiôn\xe9àlizætiøn"; + $this->assertFalse(utf8_compliant($str)); + } + + function testInvalidUtf8Ascii() { + $str = "this is an invalid char '\xe9' here"; + $this->assertFalse(utf8_compliant($str)); + } + + function testEmptyString() { + $str = ''; + $this->assertTrue(utf8_compliant($str)); + } + + function testValidTwoOctetId() { + $str = "\xc3\xb1"; + $this->assertTrue(utf8_compliant($str)); + } + + function testInvalidTwoOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn \xc3\x28 Iñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_compliant($str)); + } + + function testInvalidIdBetweenTwoAndThree() { + $str = "Iñtërnâtiônàlizætiøn\xa0\xa1Iñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_compliant($str)); + } + + function testValidThreeOctetId() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x82\xa1Iñtërnâtiônàlizætiøn"; + $this->assertTrue(utf8_compliant($str)); + } + + function testInvalidThreeOctetSequenceSecond() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x28\xa1Iñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_compliant($str)); + } + + function testInvalidThreeOctetSequenceThird() { + $str = "Iñtërnâtiônàlizætiøn\xe2\x82\x28Iñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_compliant($str)); + } + + function testValidFourOctetId() { + $str = "Iñtërnâtiônàlizætiøn\xf0\x90\x8c\xbcIñtërnâtiônàlizætiøn"; + $this->assertTrue(utf8_compliant($str)); + } + + function testInvalidFourOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xf0\x28\x8c\xbcIñtërnâtiônàlizætiøn"; + $this->assertFalse(utf8_compliant($str)); + } + + function testInvalidFiveOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xf8\xa1\xa1\xa1\xa1Iñtërnâtiônàlizætiøn"; + $this->assertTrue(utf8_compliant($str)); + } + + function testInvalidSixOctetSequence() { + $str = "Iñtërnâtiônàlizætiøn\xfc\xa1\xa1\xa1\xa1\xa1Iñtërnâtiônàlizætiøn"; + $this->assertTrue(utf8_compliant($str)); + } + +} + +//-------------------------------------------------------------------- +/** +* @package utf8 +* @subpackage Tests +*/ +if (!defined('TEST_RUNNING')) { + define('TEST_RUNNING', true); + $test = &new GroupTest('utf8_validation'); + $test->addTestCase(new test_utf8_is_valid()); + $test->addTestCase(new test_utf8_compliant()); + $reporter = & getTestReporter(); + $test->run($reporter); +} diff --git a/includes/utf8/tests/cli_reporter.php b/includes/utf8/tests/cli_reporter.php new file mode 100644 index 0000000..d8d69a7 --- /dev/null +++ b/includes/utf8/tests/cli_reporter.php @@ -0,0 +1,96 @@ +"); +} + +if (! defined('ST_FAILS_RETURN_CODE')) { + define('ST_FAILS_RETURN_CODE', 1); +} + +if (version_compare(phpversion(), '4.3.0', '<') || + php_sapi_name() == 'cgi') { + define('STDOUT', fopen('php://stdout', 'w')); + define('STDERR', fopen('php://stderr', 'w')); + register_shutdown_function( + create_function('', 'fclose(STDOUT); fclose(STDERR); return true;')); +} + +/** + * Minimal command line test displayer. Writes fail details to STDERR. Returns 0 + * to the shell if all tests pass, ST_FAILS_RETURN_CODE if any test fails. + */ +class CLIReporter extends SimpleReporter { + + var $faildetail_separator = ST_FAILDETAIL_SEPARATOR; + + function CLIReporter($faildetail_separator = NULL) { + $this->SimpleReporter(); + if (! is_null($faildetail_separator)) { + $this->setFailDetailSeparator($faildetail_separator); + } + } + + function setFailDetailSeparator($separator) { + $this->faildetail_separator = $separator; + } + + /** + * Return a formatted faildetail for printing. + */ + function &_paintTestFailDetail(&$message) { + $buffer = ''; + $faildetail = $this->getTestList(); + array_shift($faildetail); + $buffer .= implode($this->faildetail_separator, $faildetail); + $buffer .= $this->faildetail_separator . "$message\n"; + return $buffer; + } + + /** + * Paint fail faildetail to STDERR. + */ + function paintFail($message) { + parent::paintFail($message); + fwrite(STDERR, 'FAIL' . $this->faildetail_separator . + $this->_paintTestFailDetail($message)); + } + + /** + * Paint exception faildetail to STDERR. + */ + function paintException($message) { + parent::paintException($message); + fwrite(STDERR, 'EXCEPTION' . $this->faildetail_separator . + $this->_paintTestFailDetail($message)); + } + + /** + * Paint a footer with test case name, timestamp, counts of fails and + * exceptions. + */ + function paintFooter($test_name) { + $buffer = $this->getTestCaseProgress() . '/' . + $this->getTestCaseCount() . ' test cases complete: '; + + if (0 < ($this->getFailCount() + $this->getExceptionCount())) { + $buffer .= $this->getPassCount() . " passes"; + if (0 < $this->getFailCount()) { + $buffer .= ", " . $this->getFailCount() . " fails"; + } + if (0 < $this->getExceptionCount()) { + $buffer .= ", " . $this->getExceptionCount() . " exceptions"; + } + $buffer .= ".\n"; + fwrite(STDOUT, $buffer); + exit(ST_FAILS_RETURN_CODE); + } else { + fwrite(STDOUT, $buffer . $this->getPassCount() . " passes.\n"); + } + } +} diff --git a/includes/utf8/tests/config.php b/includes/utf8/tests/config.php new file mode 100644 index 0000000..86b7cd6 --- /dev/null +++ b/includes/utf8/tests/config.php @@ -0,0 +1,56 @@ + apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.54 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/strlen.php?engine=mbstring +Document Length: 6 bytes + +Concurrency Level: 1 +Time taken for tests: 5.520811 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 229000 bytes +HTML transferred: 6000 bytes +Requests per second: 181.13 [#/sec] (mean) +Time per request: 5.521 [ms] (mean) +Time per request: 5.521 [ms] (mean, across all concurrent requests) +Transfer rate: 40.39 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.0 0 0 +Processing: 4 5 2.3 5 34 +Waiting: 0 4 2.3 4 34 +Total: 4 5 2.3 5 34 + +Percentage of the requests served within a certain time (ms) + 50% 5 + 66% 5 + 75% 5 + 80% 5 + 90% 5 + 95% 6 + 98% 10 + 99% 19 + 100% 34 (longest request) diff --git a/includes/utf8/tests/data/bench/strlen_mbstring_20061001.txt b/includes/utf8/tests/data/bench/strlen_mbstring_20061001.txt new file mode 100644 index 0000000..2ee7496 --- /dev/null +++ b/includes/utf8/tests/data/bench/strlen_mbstring_20061001.txt @@ -0,0 +1,43 @@ +This is ApacheBench, Version 2.0.41-dev <$Revision: 1.1 $> apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.55 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/strlen.php?engine=mbstring +Document Length: 6 bytes + +Concurrency Level: 1 +Time taken for tests: 5.480531 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 207000 bytes +HTML transferred: 6000 bytes +Requests per second: 182.46 [#/sec] (mean) +Time per request: 5.481 [ms] (mean) +Time per request: 5.481 [ms] (mean, across all concurrent requests) +Transfer rate: 36.86 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.0 0 0 +Processing: 3 4 3.6 4 53 +Waiting: 0 4 3.5 4 53 +Total: 3 4 3.6 4 53 + +Percentage of the requests served within a certain time (ms) + 50% 4 + 66% 4 + 75% 4 + 80% 5 + 90% 7 + 95% 9 + 98% 15 + 99% 18 + 100% 53 (longest request) diff --git a/includes/utf8/tests/data/bench/strlen_native_20060226.txt b/includes/utf8/tests/data/bench/strlen_native_20060226.txt new file mode 100644 index 0000000..074c6e8 --- /dev/null +++ b/includes/utf8/tests/data/bench/strlen_native_20060226.txt @@ -0,0 +1,43 @@ +This is ApacheBench, Version 2.0.41-dev <$Revision: 1.1 $> apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.54 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/strlen.php?engine=native +Document Length: 6 bytes + +Concurrency Level: 1 +Time taken for tests: 6.914750 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 229000 bytes +HTML transferred: 6000 bytes +Requests per second: 144.62 [#/sec] (mean) +Time per request: 6.915 [ms] (mean) +Time per request: 6.915 [ms] (mean, across all concurrent requests) +Transfer rate: 32.25 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.0 0 0 +Processing: 4 6 4.5 4 42 +Waiting: 0 0 0.6 0 14 +Total: 4 6 4.5 4 42 + +Percentage of the requests served within a certain time (ms) + 50% 4 + 66% 4 + 75% 5 + 80% 13 + 90% 13 + 95% 13 + 98% 14 + 99% 15 + 100% 42 (longest request) diff --git a/includes/utf8/tests/data/bench/strlen_native_20061001.txt b/includes/utf8/tests/data/bench/strlen_native_20061001.txt new file mode 100644 index 0000000..01a75c3 --- /dev/null +++ b/includes/utf8/tests/data/bench/strlen_native_20061001.txt @@ -0,0 +1,43 @@ +This is ApacheBench, Version 2.0.41-dev <$Revision: 1.1 $> apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.55 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/strlen.php?engine=native +Document Length: 6 bytes + +Concurrency Level: 1 +Time taken for tests: 8.210329 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 207000 bytes +HTML transferred: 6000 bytes +Requests per second: 121.80 [#/sec] (mean) +Time per request: 8.210 [ms] (mean) +Time per request: 8.210 [ms] (mean, across all concurrent requests) +Transfer rate: 24.60 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.2 0 4 +Processing: 4 7 5.5 4 57 +Waiting: 0 0 0.8 0 23 +Total: 4 7 5.5 4 57 + +Percentage of the requests served within a certain time (ms) + 50% 4 + 66% 12 + 75% 12 + 80% 13 + 90% 14 + 95% 17 + 98% 17 + 99% 25 + 100% 57 (longest request) diff --git a/includes/utf8/tests/data/bench/strpos_mbstring_20060226.txt b/includes/utf8/tests/data/bench/strpos_mbstring_20060226.txt new file mode 100644 index 0000000..7d9fbce --- /dev/null +++ b/includes/utf8/tests/data/bench/strpos_mbstring_20060226.txt @@ -0,0 +1,43 @@ +This is ApacheBench, Version 2.0.41-dev <$Revision: 1.1 $> apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.54 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/strpos.php?engine=mbstring +Document Length: 95 bytes + +Concurrency Level: 1 +Time taken for tests: 4.664597 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 319000 bytes +HTML transferred: 95000 bytes +Requests per second: 214.38 [#/sec] (mean) +Time per request: 4.665 [ms] (mean) +Time per request: 4.665 [ms] (mean, across all concurrent requests) +Transfer rate: 66.67 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.0 0 0 +Processing: 2 4 3.1 2 33 +Waiting: 0 0 0.3 0 8 +Total: 2 4 3.1 2 33 + +Percentage of the requests served within a certain time (ms) + 50% 2 + 66% 6 + 75% 6 + 80% 7 + 90% 7 + 95% 7 + 98% 7 + 99% 9 + 100% 33 (longest request) diff --git a/includes/utf8/tests/data/bench/strpos_mbstring_20061001.txt b/includes/utf8/tests/data/bench/strpos_mbstring_20061001.txt new file mode 100644 index 0000000..c7ca241 --- /dev/null +++ b/includes/utf8/tests/data/bench/strpos_mbstring_20061001.txt @@ -0,0 +1,43 @@ +This is ApacheBench, Version 2.0.41-dev <$Revision: 1.1 $> apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.55 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/strpos.php?engine=mbstring +Document Length: 95 bytes + +Concurrency Level: 1 +Time taken for tests: 5.337757 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 297000 bytes +HTML transferred: 95000 bytes +Requests per second: 187.34 [#/sec] (mean) +Time per request: 5.338 [ms] (mean) +Time per request: 5.338 [ms] (mean, across all concurrent requests) +Transfer rate: 54.33 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 1.0 0 31 +Processing: 1 4 4.4 5 59 +Waiting: 0 0 0.1 0 4 +Total: 1 4 4.5 5 59 + +Percentage of the requests served within a certain time (ms) + 50% 5 + 66% 6 + 75% 6 + 80% 6 + 90% 7 + 95% 9 + 98% 13 + 99% 22 + 100% 59 (longest request) diff --git a/includes/utf8/tests/data/bench/strpos_native_20060226.txt b/includes/utf8/tests/data/bench/strpos_native_20060226.txt new file mode 100644 index 0000000..be4d484 --- /dev/null +++ b/includes/utf8/tests/data/bench/strpos_native_20060226.txt @@ -0,0 +1,43 @@ +This is ApacheBench, Version 2.0.41-dev <$Revision: 1.1 $> apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.54 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/strpos.php?engine=native +Document Length: 95 bytes + +Concurrency Level: 1 +Time taken for tests: 9.5046 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 319000 bytes +HTML transferred: 95000 bytes +Requests per second: 111.05 [#/sec] (mean) +Time per request: 9.005 [ms] (mean) +Time per request: 9.005 [ms] (mean, across all concurrent requests) +Transfer rate: 34.54 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.0 0 0 +Processing: 6 8 5.4 6 46 +Waiting: 0 0 0.1 0 4 +Total: 6 8 5.4 6 46 + +Percentage of the requests served within a certain time (ms) + 50% 6 + 66% 6 + 75% 6 + 80% 6 + 90% 19 + 95% 20 + 98% 20 + 99% 22 + 100% 46 (longest request) diff --git a/includes/utf8/tests/data/bench/strpos_native_20061001.txt b/includes/utf8/tests/data/bench/strpos_native_20061001.txt new file mode 100644 index 0000000..aaf9eb8 --- /dev/null +++ b/includes/utf8/tests/data/bench/strpos_native_20061001.txt @@ -0,0 +1,43 @@ +This is ApacheBench, Version 2.0.41-dev <$Revision: 1.1 $> apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.55 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/strpos.php?engine=native +Document Length: 95 bytes + +Concurrency Level: 1 +Time taken for tests: 9.257604 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 297000 bytes +HTML transferred: 95000 bytes +Requests per second: 108.02 [#/sec] (mean) +Time per request: 9.258 [ms] (mean) +Time per request: 9.258 [ms] (mean, across all concurrent requests) +Transfer rate: 31.33 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.0 0 1 +Processing: 7 8 2.8 8 44 +Waiting: 0 8 3.3 8 43 +Total: 7 8 2.8 8 44 + +Percentage of the requests served within a certain time (ms) + 50% 8 + 66% 8 + 75% 8 + 80% 9 + 90% 10 + 95% 11 + 98% 17 + 99% 24 + 100% 44 (longest request) diff --git a/includes/utf8/tests/data/bench/strrpos_mbstring_20060226.txt b/includes/utf8/tests/data/bench/strrpos_mbstring_20060226.txt new file mode 100644 index 0000000..a4ac705 --- /dev/null +++ b/includes/utf8/tests/data/bench/strrpos_mbstring_20060226.txt @@ -0,0 +1,43 @@ +This is ApacheBench, Version 2.0.41-dev <$Revision: 1.1 $> apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.54 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/strrpos.php?engine=mbstring +Document Length: 156 bytes + +Concurrency Level: 1 +Time taken for tests: 31.210019 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 381000 bytes +HTML transferred: 156000 bytes +Requests per second: 32.04 [#/sec] (mean) +Time per request: 31.210 [ms] (mean) +Time per request: 31.210 [ms] (mean, across all concurrent requests) +Transfer rate: 11.92 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.0 0 0 +Processing: 27 30 13.8 27 123 +Waiting: 0 29 14.4 27 122 +Total: 27 30 13.8 27 123 + +Percentage of the requests served within a certain time (ms) + 50% 27 + 66% 27 + 75% 28 + 80% 28 + 90% 28 + 95% 46 + 98% 92 + 99% 92 + 100% 123 (longest request) diff --git a/includes/utf8/tests/data/bench/strrpos_mbstring_20061001.txt b/includes/utf8/tests/data/bench/strrpos_mbstring_20061001.txt new file mode 100644 index 0000000..d25f901 --- /dev/null +++ b/includes/utf8/tests/data/bench/strrpos_mbstring_20061001.txt @@ -0,0 +1,43 @@ +This is ApacheBench, Version 2.0.41-dev <$Revision: 1.1 $> apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.55 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/strrpos.php?engine=mbstring +Document Length: 156 bytes + +Concurrency Level: 1 +Time taken for tests: 32.406529 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 359000 bytes +HTML transferred: 156000 bytes +Requests per second: 30.86 [#/sec] (mean) +Time per request: 32.407 [ms] (mean) +Time per request: 32.407 [ms] (mean, across all concurrent requests) +Transfer rate: 10.80 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.0 0 0 +Processing: 26 31 16.9 28 140 +Waiting: 0 31 16.9 28 140 +Total: 26 31 16.9 28 140 + +Percentage of the requests served within a certain time (ms) + 50% 28 + 66% 28 + 75% 29 + 80% 29 + 90% 32 + 95% 55 + 98% 103 + 99% 112 + 100% 140 (longest request) diff --git a/includes/utf8/tests/data/bench/strrpos_native_20060226.txt b/includes/utf8/tests/data/bench/strrpos_native_20060226.txt new file mode 100644 index 0000000..c5e4e6c --- /dev/null +++ b/includes/utf8/tests/data/bench/strrpos_native_20060226.txt @@ -0,0 +1,43 @@ +This is ApacheBench, Version 2.0.41-dev <$Revision: 1.1 $> apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.54 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/strrpos.php?engine=native +Document Length: 156 bytes + +Concurrency Level: 1 +Time taken for tests: 33.444724 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 381000 bytes +HTML transferred: 156000 bytes +Requests per second: 29.90 [#/sec] (mean) +Time per request: 33.445 [ms] (mean) +Time per request: 33.445 [ms] (mean, across all concurrent requests) +Transfer rate: 11.12 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.0 0 0 +Processing: 29 33 14.1 30 126 +Waiting: 0 32 14.3 30 125 +Total: 29 33 14.1 30 126 + +Percentage of the requests served within a certain time (ms) + 50% 30 + 66% 30 + 75% 30 + 80% 30 + 90% 30 + 95% 48 + 98% 99 + 99% 99 + 100% 126 (longest request) diff --git a/includes/utf8/tests/data/bench/strrpos_native_20061001.txt b/includes/utf8/tests/data/bench/strrpos_native_20061001.txt new file mode 100644 index 0000000..0ac7b1f --- /dev/null +++ b/includes/utf8/tests/data/bench/strrpos_native_20061001.txt @@ -0,0 +1,43 @@ +This is ApacheBench, Version 2.0.41-dev <$Revision: 1.1 $> apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.55 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/strrpos.php?engine=native +Document Length: 156 bytes + +Concurrency Level: 1 +Time taken for tests: 35.459417 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 359000 bytes +HTML transferred: 156000 bytes +Requests per second: 28.20 [#/sec] (mean) +Time per request: 35.459 [ms] (mean) +Time per request: 35.459 [ms] (mean, across all concurrent requests) +Transfer rate: 9.87 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.1 0 3 +Processing: 29 34 17.6 31 156 +Waiting: 0 34 17.7 31 156 +Total: 29 34 17.6 31 156 + +Percentage of the requests served within a certain time (ms) + 50% 31 + 66% 31 + 75% 32 + 80% 32 + 90% 34 + 95% 54 + 98% 113 + 99% 115 + 100% 156 (longest request) diff --git a/includes/utf8/tests/data/bench/substr_mbstring_20060226.txt b/includes/utf8/tests/data/bench/substr_mbstring_20060226.txt new file mode 100644 index 0000000..4169272 --- /dev/null +++ b/includes/utf8/tests/data/bench/substr_mbstring_20060226.txt @@ -0,0 +1,43 @@ +This is ApacheBench, Version 2.0.41-dev <$Revision: 1.1 $> apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.54 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/substr.php?engine=mbstring +Document Length: 5 bytes + +Concurrency Level: 1 +Time taken for tests: 18.344252 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 228000 bytes +HTML transferred: 5000 bytes +Requests per second: 54.51 [#/sec] (mean) +Time per request: 18.344 [ms] (mean) +Time per request: 18.344 [ms] (mean, across all concurrent requests) +Transfer rate: 12.10 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.0 0 0 +Processing: 14 17 9.8 15 53 +Waiting: 0 17 9.9 14 53 +Total: 14 17 9.8 15 53 + +Percentage of the requests served within a certain time (ms) + 50% 15 + 66% 15 + 75% 15 + 80% 15 + 90% 22 + 95% 48 + 98% 50 + 99% 50 + 100% 53 (longest request) diff --git a/includes/utf8/tests/data/bench/substr_mbstring_20061001.txt b/includes/utf8/tests/data/bench/substr_mbstring_20061001.txt new file mode 100644 index 0000000..e8754d4 --- /dev/null +++ b/includes/utf8/tests/data/bench/substr_mbstring_20061001.txt @@ -0,0 +1,43 @@ +This is ApacheBench, Version 2.0.41-dev <$Revision: 1.1 $> apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.55 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/substr.php?engine=mbstring +Document Length: 5 bytes + +Concurrency Level: 1 +Time taken for tests: 16.417722 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 206000 bytes +HTML transferred: 5000 bytes +Requests per second: 60.91 [#/sec] (mean) +Time per request: 16.418 [ms] (mean) +Time per request: 16.418 [ms] (mean, across all concurrent requests) +Transfer rate: 12.24 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.0 0 0 +Processing: 14 15 3.4 16 55 +Waiting: 0 15 3.8 16 55 +Total: 14 15 3.4 16 55 + +Percentage of the requests served within a certain time (ms) + 50% 16 + 66% 16 + 75% 16 + 80% 17 + 90% 17 + 95% 18 + 98% 23 + 99% 35 + 100% 55 (longest request) diff --git a/includes/utf8/tests/data/bench/substr_native_20060226.txt b/includes/utf8/tests/data/bench/substr_native_20060226.txt new file mode 100644 index 0000000..c85a705 --- /dev/null +++ b/includes/utf8/tests/data/bench/substr_native_20060226.txt @@ -0,0 +1,43 @@ +This is ApacheBench, Version 2.0.41-dev <$Revision: 1.1 $> apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.54 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/substr.php?engine=native +Document Length: 5 bytes + +Concurrency Level: 1 +Time taken for tests: 21.496423 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 228000 bytes +HTML transferred: 5000 bytes +Requests per second: 46.52 [#/sec] (mean) +Time per request: 21.496 [ms] (mean) +Time per request: 21.496 [ms] (mean, across all concurrent requests) +Transfer rate: 10.33 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.0 0 0 +Processing: 16 20 11.7 17 68 +Waiting: 0 20 11.9 16 68 +Total: 16 20 11.7 17 68 + +Percentage of the requests served within a certain time (ms) + 50% 17 + 66% 17 + 75% 17 + 80% 17 + 90% 44 + 95% 54 + 98% 56 + 99% 58 + 100% 68 (longest request) diff --git a/includes/utf8/tests/data/bench/substr_native_20061001.txt b/includes/utf8/tests/data/bench/substr_native_20061001.txt new file mode 100644 index 0000000..44916d4 --- /dev/null +++ b/includes/utf8/tests/data/bench/substr_native_20061001.txt @@ -0,0 +1,43 @@ +This is ApacheBench, Version 2.0.41-dev <$Revision: 1.1 $> apache-2.0 +Copyright (c) 1996 Adam Twiss, Zeus Technology Ltd, http://www.zeustech.net/ +Copyright (c) 1998-2002 The Apache Software Foundation, http://www.apache.org/ + +Benchmarking localhost (be patient) + + +Server Software: Apache/2.0.55 +Server Hostname: localhost +Server Port: 80 + +Document Path: /~harryf/utf8/tests/bench/substr.php?engine=native +Document Length: 5 bytes + +Concurrency Level: 1 +Time taken for tests: 25.217880 seconds +Complete requests: 1000 +Failed requests: 0 +Write errors: 0 +Total transferred: 206000 bytes +HTML transferred: 5000 bytes +Requests per second: 39.65 [#/sec] (mean) +Time per request: 25.218 [ms] (mean) +Time per request: 25.218 [ms] (mean, across all concurrent requests) +Transfer rate: 7.97 [Kbytes/sec] received + +Connection Times (ms) + min mean[+/-sd] median max +Connect: 0 0 0.0 0 0 +Processing: 18 24 16.1 19 128 +Waiting: 15 24 16.0 19 127 +Total: 18 24 16.1 19 128 + +Percentage of the requests served within a certain time (ms) + 50% 19 + 66% 20 + 75% 20 + 80% 20 + 90% 64 + 95% 67 + 98% 71 + 99% 80 + 100% 128 (longest request) diff --git a/includes/utf8/tests/data/utf8.html b/includes/utf8/tests/data/utf8.html new file mode 100644 index 0000000..3ffa622 --- /dev/null +++ b/includes/utf8/tests/data/utf8.html @@ -0,0 +1,755 @@ + + UTF-8 SAMPLER + + ¥ · £ · € · $ · ¢ · ₡ · ₢ · ₣ · ₤ · ₥ · ₦ · ₧ · ₨ · ₩ · ₪ · ₫ · ₭ · ₮ · ₯ + + Frank da Cruz + The Kermit Project - Columbia University + New York City + fdc@columbia.edu + + /Last update:/ Sun Jun 12 20:24:10 2005 + +------------------------------------------------------------------------ +[ PEACE ] [ Poetry <#poetry> ] [ I +Can Eat Glass <#glass> ] [ The Quick Brown Fox <#quickbrownfox> ] [ HTML +Features <#html> ] [ Credits, Tools, Commentary <#credits> ] + +UTF-8 is an ASCII-preserving encoding method for Unicode +(ISO 10646), the Universal Character Set (UCS). The UCS encodes most of +the world's writing systems in a single character set, allowing you to +mix languages and scripts within a document without needing any tricks +for switching character sets. This web page is encoded directly in UTF-8. + +As shown HERE , Columbia University's Kermit 95 +terminal emulation software can display UTF-8 plain text in Windows 95, +98, ME, NT, XP, or 2000 when using a monospace Unicode font like Andale +Mono WT J or Everson Mono Terminal +, or the lesser populated Courier New, +Lucida Console, or Andale Mono. C-Kermit can handle it +too, if you have a Unicode display +. As many languages as are +representable in your font can be seen on the screen at the same time. + +This, however, is a Web page. Some Web browsers can handle UTF-8, some +can't. And those that can might not have a sufficiently populated font +to work with (some browsers might pick glyphs dynamically from multiple +fonts; Netscape 6 seems to do this). CLICK HERE + for a survey of Unicode +fonts for Windows. + +The subtitle above shows currency symbols of many lands. If they don't +appear as blobs, we're off to a good start! + + + ------------------------------------------------------------------------ + Poetry + +From the Anglo-Saxon Rune Poem +(Rune version): + + ᚠᛇᚻ᛫ᛒᛦᚦ᛫ᚠᚱᚩᚠᚢᚱ᛫ᚠᛁᚱᚪ᛫ᚷᛖᚻᚹᛦᛚᚳᚢᛗ + ᛋᚳᛖᚪᛚ᛫ᚦᛖᚪᚻ᛫ᛗᚪᚾᚾᚪ᛫ᚷᛖᚻᚹᛦᛚᚳ᛫ᛗᛁᚳᛚᚢᚾ᛫ᚻᛦᛏ᛫ᛞᚫᛚᚪᚾ + ᚷᛁᚠ᛫ᚻᛖ᛫ᚹᛁᛚᛖ᛫ᚠᚩᚱ᛫ᛞᚱᛁᚻᛏᚾᛖ᛫ᛞᚩᛗᛖᛋ᛫ᚻᛚᛇᛏᚪᚾ᛬ + +From Laȝamon's/ Brut / (/The +Chronicles of England/, Middle English, West Midlands): + + An preost wes on leoden, Laȝamon was ihoten + He wes Leovenaðes sone -- liðe him be Drihten. + He wonede at Ernleȝe at æðelen are chirechen, + Uppen Sevarne staþe, sel þar him þuhte, + Onfest Radestone, þer he bock radde. + +(The third letter in the author's name is Yogh, missing from many fonts; +CLICK HERE for another Middle English sample with +some explanation of letters and encoding). + +From the Tagelied of *Wolfram von Eschenbach* + (Middle High German): + + Sîne klâwen durh die wolken sint geslagen, + er stîget ûf mit grôzer kraft, + ich sih in grâwen tägelîch als er wil tagen, + den tac, der im geselleschaft + erwenden wil, dem werden man, + den ich mit sorgen în verliez. + ich bringe in hinnen, ob ich kan. + sîn vil manegiu tugent michz leisten hiez. + +Some lines of *Odysseus Elytis* + (Greek): + + Τη γλώσσα μου έδωσαν ελληνική + το σπίτι φτωχικό στις αμμουδιές του Ομήρου. + Μονάχη έγνοια η γλώσσα μου στις αμμουδιές του Ομήρου. + + από το Άξιον Εστί + του Οδυσσέα Ελύτη + +The first stanza of *Pushkin* +'s +Bronze Horseman (Russian): + + На берегу пустынных волн + Стоял он, дум великих полн, + И вдаль глядел. Пред ним широко + Река неслася; бедный чёлн + По ней стремился одиноко. + По мшистым, топким берегам + Чернели избы здесь и там, + Приют убогого чухонца; + И лес, неведомый лучам + В тумане спрятанного солнца, + Кругом шумел. + +*Šota Rustaveli* +'s Veṗxis +Ṭq̇aosani, ̣︡Th, The Knight in the Tiger's Skin (Georgian): + + ვეპხის ტყაოსანი შოთა რუსთაველი + + ღმერთსი შემვედრე, ნუთუ კვლა დამხსნას სოფლისა შრომასა, ცეცხლს, წყალსა + და მიწასა, ჰაერთა თანა მრომასა; მომცნეს ფრთენი და აღვფრინდე, + მივჰხვდე მას ჩემსა ნდომასა, დღისით და ღამით ვჰხედვიდე მზისა ელვათა + კრთომაასა. + +Tamil poetry of Cupiramaniya Paarathiyar, சுப்ரமணிய பாரதியார் (1882-1921): + + யாமறிந்த மொழிகளிலே தமிழ்மொழி போல் இனிதாவது எங்கும் காணோம், + பாமரராய் விலங்குகளாய், உலகனைத்தும் இகழ்ச்சிசொலப் பான்மை கெட்டு, + நாமமது தமிழரெனக் கொண்டு இங்கு வாழ்ந்திடுதல் நன்றோ? சொல்லீர்! + + + ------------------------------------------------------------------------ + I Can Eat Glass + +And from the sublime to the ridiculous, here is a certain phrase¹ +<#notes> in an assortment of languages: + + 1. *Sanskrit*: काचं शक्नोम्यत्तुम् । नोपहिनस्ति माम् ॥ + 2. *Sanskrit* /(standard transcription):/ kācaṃ śaknomyattum; + nopahinasti mām. + 3. *Classical Greek*: ὕαλον ϕαγεῖν δύναμαι· τοῦτο οὔ με βλάπτει. + 4. *Greek*: Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα. + *Etruscan*: (NEEDED) + 5. *Latin*: Vitrum edere possum; mihi non nocet. + 6. *Old French*: Je puis mangier del voirre. Ne me nuit. + 7. *French*: Je peux manger du verre, ça ne me fait pas de mal. + 8. *Provençal / Occitan*: Pòdi manjar de veire, me nafrariá pas. + 9. *Québécois*: J'peux manger d'la vitre, ça m'fa pas mal. + 10. *Walloon*: Dji pou magnî do vêre, çoula m' freut nén må. + *Champenois*: (NEEDED) + *Lorrain*: (NEEDED) + 11. *Picard*: Ch'peux mingi du verre, cha m'foé mie n'ma. + *Corsican*: (NEEDED) + 12. *Kreyòl Ayisyen*: Mwen kap manje vè, li pa blese'm. + 13. *Basque*: Kristala jan dezaket, ez dit minik ematen. + 14. *Catalan*: Puc menjar vidre que no em fa mal. + 15. *Spanish*: Puedo comer vidrio, no me hace daño. + 16. *Aragones*: Puedo minchar beire, no me'n fa mal . + 17. *Galician*: Eu podo xantar cristais e non cortarme. + 18. *Portuguese*: Posso comer vidro, não me faz mal. + 19. *Brazilian Portuguese* (7 <#notes>): Posso comer vidro, não me + machuca. + 20. *Caboverdiano*: M' podê cumê vidru, ca ta maguâ-m'. + 21. *Papiamentu*: Ami por kome glas anto e no ta hasimi daño. + 22. *Italian*: Posso mangiare il vetro e non mi fa male. + 23. *Milanese*: Sôn bôn de magnà el véder, el me fa minga mal. + 24. *Roman*: Me posso magna' er vetro, e nun me fa male. + 25. *Napoletano*: M' pozz magna' o'vetr, e nun m' fa mal. + 26. *Sicilian*: Puotsu mangiari u vitru, nun mi fa mali. + 27. *Venetian*: Mi posso magnare el vetro, no'l me fa mae. + 28. *Zeneise* /(Genovese):/ Pòsso mangiâ o veddro e o no me fà mâ. + *Rheto-Romance / Romansch*: (NEEDED) + *Romany / Tsigane*: (NEEDED) + 29. *Romanian*: Pot să mănânc sticlă și ea nu mă rănește. + 30. *Esperanto*: Mi povas manĝi vitron, ĝi ne damaĝas min. + *Pictish*: (NEEDED) + *Breton*: (NEEDED) + 31. *Cornish*: Mý a yl dybry gwéder hag éf ny wra ow ankenya. + 32. *Welsh*: Dw i'n gallu bwyta gwydr, 'dyw e ddim yn gwneud dolur i mi. + 33. *Manx Gaelic*: Foddym gee glonney agh cha jean eh gortaghey mee. + 34. *Old Irish* /(Ogham):/ ᚛᚛ᚉᚑᚅᚔᚉᚉᚔᚋ ᚔᚈᚔ ᚍᚂᚐᚅᚑ ᚅᚔᚋᚌᚓᚅᚐ᚜ + 35. *Old Irish* /(Latin):/ Con·iccim ithi nglano. Ním·géna. + 36. *Irish*: Is féidir liom gloinne a ithe. Ní dhéanann sí dochar ar + bith dom. + 37. *Scottish Gaelic*: S urrainn dhomh gloinne ithe; cha ghoirtich i mi. + 38. *Anglo-Saxon* /(Runes):/ ᛁᚳ᛫ᛗᚨᚷ᛫ᚷᛚᚨᛋ᛫ᛖᚩᛏᚪᚾ᛫ᚩᚾᛞ᛫ᚻᛁᛏ᛫ᚾᛖ᛫ᚻᛖᚪᚱᛗᛁᚪᚧ᛫ᛗᛖ᛬ + 39. *Anglo-Saxon* /(Latin):/ Ic mæg glæs eotan ond hit ne hearmiað me. + 40. *Middle English*: Ich canne glas eten and hit hirtiþ me nouȝt. + 41. *English*: I can eat glass and it doesn't hurt me. + 42. *English* /(IPA):/ [aɪ kæn iːt glɑːs ænd ɪt dɐz nɒt hɜːt miː] + (Received Pronunciation) + 43. *English* /(Braille):/ ⠊⠀⠉⠁⠝⠀⠑⠁⠞⠀⠛⠇⠁⠎⠎⠀⠁⠝⠙⠀⠊⠞⠀⠙⠕⠑⠎⠝⠞⠀⠓⠥⠗⠞⠀⠍⠑ + 44. *Lalland Scots / Doric*: Ah can eat gless, it disnae hurt us. + *Glaswegian*: (NEEDED) + 45. *Gothic* (4 <#notes>): 𐌼𐌰𐌲 𐌲𐌻𐌴𐍃 𐌹̈𐍄𐌰𐌽, 𐌽𐌹 𐌼𐌹𐍃 𐍅𐌿 + 𐌽𐌳𐌰𐌽 𐌱𐍂𐌹𐌲𐌲𐌹𐌸. + 46. *Old Norse* /(Runes):/ ᛖᚴ ᚷᛖᛏ ᛖᛏᛁ ᚧ ᚷᛚᛖᚱ ᛘᚾ ᚦᛖᛋᛋ ᚨᚧ ᚡᛖ ᚱᚧᚨ ᛋᚨᚱ + 47. *Old Norse* /(Latin):/ Ek get etið gler án þess að verða sár. + 48. *Norsk / Norwegian (Nynorsk):* Eg kan eta glas utan å skada meg. + 49. *Norsk / Norwegian (Bokmål):* Jeg kan spise glass uten å skade meg. + *Føroyskt / Faroese*: (NEEDED) + 50. *Íslenska / Icelandic*: Ég get etið gler án þess að meiða mig. + 51. *Svenska / Swedish*: Jag kan äta glas utan att skada mig. + 52. *Dansk / Danish*: Jeg kan spise glas, det gør ikke ondt på mig. + 53. *Soenderjysk*: Æ ka æe glass uhen at det go mæ naue. + 54. *Frysk / Frisian*: Ik kin glês ite, it docht me net sear. + 55. *Nederlands / Dutch*: Ik kan glas eten, het doet mij geen kwaad. + 56. *Kirchröadsj/Bôchesserplat*: Iech ken glaas èèse, mer 't deet + miech jing pieng. + 57. *Afrikaans*: Ek kan glas eet, maar dit doen my nie skade nie. + 58. *Lëtzebuergescht / Luxemburgish*: Ech kan Glas iessen, daat deet + mir nët wei. + 59. *Deutsch / German*: Ich kann Glas essen, ohne mir weh zu tun. + 60. *Ruhrdeutsch*: Ich kann Glas verkasematuckeln, ohne dattet mich + wat jucken tut. + 61. *Lausitzer Mundart* ("Lusatian"): Ich koann Gloos assn und doas + dudd merr ni wii. + 62. *Odenwälderisch*: Iech konn glaasch voschbachteln ohne dass es mir + ebbs daun doun dud. + 63. *Sächsisch / Saxon*: 'sch kann Glos essn, ohne dass'sch mer wehtue. + 64. *Pfälzisch*: Isch konn Glass fresse ohne dasses mer ebbes ausmache + dud. + 65. *Schwäbisch / Swabian*: I kå Glas frässa, ond des macht mr nix! + 66. *Bayrisch / Bavarian*: I koh Glos esa, und es duard ma ned wei. + 67. *Allemannisch*: I kaun Gloos essen, es tuat ma ned weh. + 68. *Schwyzerdütsch*: Ich chan Glaas ässe, das tuet mir nöd weeh. + 69. *Hungarian*: Meg tudom enni az üveget, nem lesz tőle bajom. + 70. *Suomi / Finnish*: Voin syödä lasia, se ei vahingoita minua. + 71. *Sami (Northern)*: Sáhtán borrat lása, dat ii leat bávččas. + 72. *Erzian*: Мон ярсан суликадо, ды зыян эйстэнзэ а ули. + *Karelian*: (NEEDED) + *Vepsian*: (NEEDED) + *Votian*: (NEEDED) + *Livonian*: (NEEDED) + 73. *Estonian*: Ma võin klaasi süüa, see ei tee mulle midagi. + 74. *Latvian*: Es varu ēst stiklu, tas man nekaitē. + 75. *Lithuanian*: Aš galiu valgyti stiklą ir jis manęs nežeidžia + *Old Prussian*: (NEEDED) + *Sorbian* (Wendish): (NEEDED) + 76. *Czech*: Mohu jíst sklo, neublíží mi. + 77. *Slovak*: Môžem jesť sklo. Nezraní ma. + 78. *Polska / Polish*: Mogę jeść szkło i mi nie szkodzi. + 79. *Slovenian:* Lahko jem steklo, ne da bi mi škodovalo. + 80. *Croatian*: Ja mogu jesti staklo i ne boli me. + 81. *Serbian* /(Latin):/ Mogu jesti staklo a da mi ne škodi. + 82. *Serbian* /(Cyrillic):/ Могу јести стакло а да ми не шкоди. + 83. *Macedonian:* Можам да јадам стакло, а не ме штета. + 84. *Russian*: Я могу есть стекло, оно мне не вредит. + 85. *Belarusian* /(Cyrillic):/ Я магу есці шкло, яно мне не шкодзіць. + 86. *Belarusian* /(Lacinka):/ Ja mahu jeści škło, jano mne ne škodzić. + 87. *Ukrainian*: Я можу їсти шкло, й воно мені не пошкодить. + 88. *Bulgarian*: Мога да ям стъкло, то не ми вреди. + 89. *Georgian*: მინას ვჭამ და არა მტკივა. + 90. *Armenian*: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։ + 91. *Albanian*: Unë mund të ha qelq dhe nuk më gjen gjë. + 92. *Turkish*: Cam yiyebilirim, bana zararı dokunmaz. + 93. *Turkish* /(Ottoman):/ جام ييه بلورم بڭا ضررى طوقونمز + 94. *Bangla / Bengali*: আমি কাঁচ খেতে পারি, তাতে আমার কোনো ক্ষতি হয় না। + 95. *Marathi*: मी काच खाऊ शकतो, मला ते दुखत नाही. + 96. *Hindi*: मैं काँच खा सकता हूँ, मुझे उस से कोई पीडा नहीं होती. + 97. *Tamil*: நான் கண்ணாடி சாப்பிடுவேன், அதனால் எனக்கு ஒரு கேடும் வராது. + 98. *Urdu*(2) <#notes>: میں کانچ کھا سکتا ہوں اور مجھے تکلیف نہیں ہوتی ۔ + 99. *Pashto*(2) <#notes>: زه شيشه خوړلې شم، هغه ما نه خوږوي + 100. *Farsi / Persian*: .من می توانم بدونِ احساس درد شيشه بخورم + 101. *Arabic*(2) <#notes>: أنا قادر على أكل الزجاج و هذا لا يؤلمني. + *Aramaic*: (NEEDED) + 102. *Hebrew*(2) <#notes>: אני יכול לאכול זכוכית וזה לא מזיק לי. + 103. *Yiddish*(2) <#notes>: איך קען עסן גלאָז און עס טוט מיר נישט װײ. + *Judeo-Arabic*: (NEEDED) + *Ladino*: (NEEDED) + *Gǝʼǝz*: (NEEDED) + *Amharic*: (NEEDED) + 104. *Twi*: Metumi awe tumpan, ɜnyɜ me hwee. + 105. *Hausa* (/Latin/): Inā iya taunar gilāshi kuma in gamā lāfiyā. + 106. *Hausa* (/Ajami/) (2) <#notes>: إِنا إِىَ تَونَر غِلَاشِ كُمَ إِن غَمَا لَافِىَا + 107. *Yoruba*(3) <#notes>: Mo lè je̩ dígí, kò ní pa mí lára. + 108. *(Ki)Swahili*: Naweza kula bilauri na sikunyui. + 109. *Malay*: Saya boleh makan kaca dan ia tidak mencederakan saya. + 110. *Tagalog*: Kaya kong kumain nang bubog at hindi ako masaktan. + 111. *Chamorro*: Siña yo' chumocho krestat, ti ha na'lalamen yo'. + 112. *Javanese*: Aku isa mangan beling tanpa lara. + *Burmese*: (NEEDED) + 113. *Vietnamese (quốc ngữ)*: Tôi có thể ăn thủy tinh mà không hại gì. + 114. *Vietnamese (nôm)* (4 <#notes>): 些 𣎏 世 咹 水 晶 𦓡 空 𣎏 害 咦 + *Khmer*: (NEEDED) + *Lao*: (NEEDED) + 115. *Thai*: ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ + 116. *Mongolian* /(Cyrillic):/ Би шил идэй чадна, надад хортой биш + 117. *Mongolian* /(Classic) (5 <#notes>):/ ᠪᠢ ᠰᠢᠯᠢ ᠢᠳᠡᠶᠦ ᠴᠢᠳᠠᠨᠠ ᠂ ᠨᠠᠳᠤᠷ + ᠬᠣᠤᠷᠠᠳᠠᠢ ᠪᠢᠰᠢ + *Dzongkha*: (NEEDED) + *Nepali*: (NEEDED) + 118. *Tibetan*: ཤེལ་སྒོ་ཟ་ནས་ང་ན་གི་མ་རེད། + 119. *Chinese*: 我能吞下玻璃而不伤身体。 + 120. *Chinese* (Traditional): 我能吞下玻璃而不傷身體。 + 121. *Taiwanese*(6) <#notes>: Góa ē-tàng chia̍h po-lê, mā bē tio̍h-siong. + 122. *Japanese*: 私はガラスを食べられます。それは私を傷つけません。 + 123. *Korean*: 나는 유리를 먹을 수 있어요. 그래도 아프지 않아요 + 124. *Bislama*: Mi save kakae glas, hemi no save katem mi. + 125. *Hawaiian*: Hiki iaʻu ke ʻai i ke aniani; ʻaʻole nō lā au e ʻeha. + 126. *Marquesan*: E koʻana e kai i te karahi, mea ʻā, ʻaʻe hauhau. + 127. *Chinook Jargon:* Naika məkmək kakshət labutay, pi weyk ukuk + munk-sik nay. + 128. *Navajo*: Tsésǫʼ yishą́ągo bííníshghah dóó doo shił neezgai da. + *Cherokee* /(and Cree, Ojibwa, Inuktitut, and other Native + American languages):/ (NEEDED) + *Garifuna*: (NEEDED) + *Gullah*: (NEEDED) + 129. *Lojban*: mi kakne le nu citka le blaci .iku'i le se go'i na xrani mi + 130. *Nórdicg*: Ljœr ye caudran créneþ ý jor cẃran. + +/(Additions, corrections, completions,/ /gratefully accepted/ +/.)/ + +For testing purposes, some of these are repeated in a *monospace +font* . . . + + 1. Euro Symbol: €. + 2. Greek: Μπορώ να φάω σπασμένα γυαλιά χωρίς να πάθω τίποτα. + 3. Íslenska / Icelandic: Ég get etið gler án þess að meiða mig. + 4. Polish: Mogę jeść szkło, i mi nie szkodzi. + 5. Romanian: Pot să mănânc sticlă și ea nu mă rănește. + 6. Ukrainian: Я можу їсти шкло, й воно мені не пошкодить. + 7. Armenian: Կրնամ ապակի ուտել և ինծի անհանգիստ չըներ։ + 8. Georgian: მინას ვჭამ და არა მტკივა. + 9. Hindi: मैं काँच खा सकता हूँ, मुझे उस से कोई पीडा नहीं होती. + 10. Hebrew(2) <#notes>: אני יכול לאכול זכוכית וזה לא מזיק לי. + 11. Yiddish(2) <#notes>: איך קען עסן גלאָז און עס טוט מיר נישט װײ. + 12. Arabic(2) <#notes>: أنا قادر على أكل الزجاج و هذا لا يؤلمني. + 13. Japanese: 私はガラスを食べられます。それは私を傷つけません。 + 14. Thai: ฉันกินกระจกได้ แต่มันไม่ทำให้ฉันเจ็บ + +*Notes:* + + 1. The "I can eat glass" phrase and initial translations (about 30 of + them) were borrowed from Ethan Mollick's I Can Eat Glass + page (which disappeared + on or about June 2004) and converted to UTF-8. Since Ethan's + original page is gone, I should mention that his purpose was offer + travelers a phrase they could use in any country that would + command a certain kind of respect, or at least get attention. See + Credits <#credits> for the many additional contributions since + then. When submitting new entries, the word "hurt" (if you have a + choice) is used in the sense of "cause harm", "do damage", or + "bother", rather than "inflict pain" or "make sad". In this vein + Otto Stolz comments (as do others further down; personally I think + it's better for the purpose of this page to have extra entries + and/or to show a greater repertoire of characters than it is to + enforce a strict interpretation of the word "hurt"!): + + This is the meaning I have translated to the Swabian dialect. + However, I just have noticed that most of the German variants + translate the "inflict pain" meaning. The German example + should rather read: + + "Ich kann Glas essen ohne mir zu schaden." + + (The comma fell victim to the 1996 orthographic reform, cf. + http://www.ids-mannheim.de/reform/e3-1.html#P76. + + You may wish to contact the contributors of the following + translations to correct them: + + * Lëtzebuergescht / Luxemburgish: Ech kan Glas iessen, + daat deet mir nët wei. + * Lausitzer Mundart ("Lusatian"): Ich koann Gloos assn und + doas dudd merr ni wii. + * Sächsisch / Saxon: 'sch kann Glos essn, ohne dass'sch + mer wehtue. + * Bayrisch / Bavarian: I koh Glos esa, und es duard ma ned + wei. + * Allemannisch: I kaun Gloos essen, es tuat ma ned weh. + * Schwyzerdütsch: Ich chan Glaas ässe, das tuet mir nöd weeh. + + In contrast, I deem the following translations *alright*: + + * Ruhrdeutsch: Ich kann Glas verkasematuckeln, ohne dattet + mich wat jucken tut. + * Pfälzisch: Isch konn Glass fresse ohne dasses mer ebbes + ausmache dud. + * Schwäbisch / Swabian: I kå Glas frässa, ond des macht mr + nix! + + (However, you could remove the commas, on account of + http://www.ids-mannheim.de/reform/e3-1.html#P76 and + http://www.ids-mannheim.de/reform/e3-1.html#P72, respectively.) + + I guess, also these examples translate the /wrong/ sense of + "hurt", though I do not know these languages well enough to + assert them definitely: + + * Nederlands / Dutch: Ik kan glas eten; het doet mij geen + pijn. /(This one has been changed)/ + * Kirchröadsj/Bôchesserplat: Iech ken glaas èèse, mer 't + deet miech jing pieng. + + In the Romanic languages, the variations on "fa male" (it) are + probably wrong, whilst the variations on "hace daño" (es) and + "damaĝas" (Esperanto) are probably correct; "nocet" (la) is + definitely right. + + The northern Germanic variants of "skada" are probably right, + as are the Slavic variants of "škodi/шкоди" (se); however the + Slavic variants of " boli" (hv) are probably wrong, as + "bolena" means "pain/ache", IIRC. + + The numbering of the samples is arbitrary, done only to keep track + of how many there are, and can change any time a new entry is + added. The arrangement is also arbitrary but with some attempt to + group related examples together. Note: All languages not listed + are wanted, not just the ones that say (NEEDED). + + 2. Correct right-to-left display of these languages depends on the + capabilities of your browser. The period should appear on the + left. In the monospace Yiddish example, the Yiddish digraphs + should occupy one character cell. + 3. Yoruba: The third word is Latin letter small 'j' followed by small + 'e' with U+0329, Combining Vertical Line Below. This displays + correctly only if your Unicode font includes the U+0329 glyph and + your browser supports combining diacritical marks. The Indic + examples also include combining sequences. + 4. Includes Unicode 3.1 (or later) characters beyond Plane 0. + 5. The Classic Mongolian example should be vertical, top-to-bottom + and left-to-right. But such display is almost impossible. Also no + font yet exists which provides the proper ligatures and positional + variants for the characters of this script, which works somewhat + like Arabic. + 6. Taiwanese is also known as Holo or Hoklo, and is related to + Southern Min dialects such as Amoy. Contributed by Henry H. + Tan-Tenn, who comments, "The above is the romanized version, in a + script current among Taiwanese Christians since the mid-19th + century. It was invented by British missionaries and saw use in + hundreds of published works, mostly of a religious nature. Most + Taiwanese did not know Chinese characters then, or at least not + well enough to read. More to the point, though, a written standard + using Chinese characters has never developed, so a significant + minority of words are represented with different candidate + characters, depending on one's personal preference or etymological + theory. In this sentence, for example, "-tàng", "chia̍h", "mā" and + "bē" are problematic using Chinese characters. "Góa" (I/me) and + "po-lê" (glass) are as written in other Sinitic languages (e.g. + Mandarin, Hakka)." + 7. Wagner Amaral of Pinese & Amaral Associados notes that the + Brazilian Portuguese sentence for "I can eat glass" should be + identical to the Portuguese one, as the word "machuca" means + "inflict pain", or rather "injuries". The words "faz mal" would + more correctly translate as "cause harm". + + + ------------------------------------------------------------------------ + The Quick Brown Fox + +The "I can eat glass" sentences do not necessarily show off the +orthography of each language to best advantage. In many alphabetic +written languages it is possible to include all (or most) letters (or +"special" characters) in a single (often nonsense) /pangram/. These were +traditionally used in typewriter instruction; now they are useful for +stress-testing computer fonts and keyboard input methods. Here are a few +examples (SEND MORE): + + 1. *English:* The quick brown fox jumps over the lazy dog. + 2. *Irish:* "An ḃfuil do ċroí ag bualaḋ ó ḟaitíos an ġrá a ṁeall lena + ṗóg éada ó ṡlí do leasa ṫú?" "D'ḟuascail Íosa Úrṁac na hÓiġe + Beannaiṫe pór Éava agus Áḋaiṁ." + 3. *Dutch:* Pa's wijze lynx bezag vroom het fikse aquaduct. + 4. *German: * Falsches Üben von Xylophonmusik quält jeden größeren + Zwerg. (1) + 5. *German: * Im finſteren Jagdſchloß am offenen Felsquellwaſſer + patzte der affig-flatterhafte kauzig-höf‌liche Bäcker über ſeinem + verſifften kniffligen C-Xylophon. (2) + 6. *Swedish:* Flygande bäckasiner söka strax hwila på mjuka tuvor. + 7. *Czech:* Příliš žluťoučký kůň úpěl ďábelské kódy. + 8. *Slovak:* Starý kôň na hŕbe kníh žuje tíško povädnuté ruže, na + stĺpe sa ďateľ učí kvákať novú ódu o živote. + 9. *Russian:* В чащах юга жил-был цитрус? Да, но фальшивый экземпляр! + ёъ. + 10. *Bulgarian:* Жълтата дюля беше щастлива, че пухът, който цъфна, + замръзна като гьон. + 11. *Sami (Northern):* Vuol Ruoŧa geđggiid leat máŋga luosa ja čuovžža. + 12. *Hungarian:* Árvíztűrő tükörfúrógép. + 13. *Spanish:* El pingüino Wenceslao hizo kilómetros bajo exhaustiva + lluvia y frío, añoraba a su querido cachorro. + 14. *Portuguese:* O próximo vôo à noite sobre o Atlântico, põe + freqüentemente o único médico. (3) + 15. *French:* Les naïfs ægithales hâtifs pondant à Noël où il gèle + sont sûrs d'être déçus et de voir leurs drôles d'œufs abîmés. + 16. *Esperanto:* Eĥoŝanĝo ĉiuĵaŭde. + 17. *Hebrew:* זה כיף סתם לשמוע איך תנצח קרפד עץ טוב בגן. + 18. *Japanese* (Hiragana): + + いろはにほへど ちりぬるを + わがよたれぞ つねならむ + うゐのおくやま けふこえて + あさきゆめみじ ゑひもせず (4) + +*Notes:* + + 1. Other phrases commonly used in Germany include: "Ein wackerer + Bayer vertilgt ja bequem zwo Pfund Kalbshaxe" and, more recently, + "Franz jagt im komplett verwahrlosten Taxi quer durch Bayern", but + both lack umlauts and esszet. Previously, going for the shortest + sentence that has all the umlauts and special characters, I had + "Grüße aus Bärenhöfe (und Óechtringen)!" Acute accents are not + used in native German words, so I was surprised to discover + "Óechtringen" in the Deutsche Bundespost Postleitzahlenbuch + (Vorsicht! + 2.8MB JPG image). It's a small village in eastern Lower Saxony. + The "oe" in this case turns out to be the Lower Saxon "lengthening + e" (Dehnungs-e), which makes the previous vowel long (used in a + number of Lower Saxon place names such as Soest and Itzehoe), not + the "e" that indicates umlaut of the preceding vowel. Many thanks + to the Óechtringen-Namenschreibungsuntersuchungskomitee (Alex + Bochannek, Manfred Erren, Asmus Freytag, Christoph Päper, plus + Werner Lemberg who serves as the + Óechtringen-Namenschreibungsuntersuchungskomiteerechtschreibungsprüfer) + for their relentless pursuit of the facts in this case. + Conclusion: the accent almost certainly does not belong on this + (or any other native German) word, but neither can it be dismissed + as dirt on the page. To add to the mystery, it has been reported + that other copies of the same edition of the PLZB do not show the + accent! + + 2. From Karl Pentzlin (Kochel am See, Bavaria, Germany): "This German + phrase is suited for display by a Fraktur (broken letter) font. It + contains: all common three-letter ligatures: ffi ffl fft and all + two-letter ligatures required by the Duden for Fraktur + typesetting: ch ck ff fi fl ft ll ſch ſi ſſ ſt tz (all in a manner + such they are not part of a three-letter ligature), one example of + f-l where German typesetting rules prohibit ligating (marked by a + ZWNJ), and all German letters a...z, ä,ö,ü,ß, ſ [long s] (all in a + manner such that they are not part of a two-letter Fraktur + ligature)." Otto Stolz notes that "'Schloß' is now spelled + 'Schloss', in contrast to 'größer' (example 4) which has kept its + 'ß'. Fraktur has been banned from general use, in 1942, and long-s + (ſ) has ceased to be used with Antiqua (Roman) even earlier (the + latest Antiqua-ſ I have seen is from 1913, but then I am no + expert, so there may well be a later instance." Later Otto + confirms the latter theory, "Now I've run across a book “Deutsche + Rechtschreibung” (edited by Lutz Mackensen) from 1954 (my reprint + is from 1956) that has kept the Antiqua-ſ in its dictionary part + (but neither in the preface nor in the appendix)." + + 3. Diaeresis is not used in Iberian Portuguese. + + 4. From Yurio Miyazawa: "This poetry contains all the sounds in the + Japanese language and used to be the first thing for children to + learn in their Japanese class. The Hiragana version is + particularly neat because it covers every character in the + phonetic Hiragana character set." Yurio also sent the Kanji version: + + 色は匂へど 散りぬるを + 我が世誰ぞ 常ならむ + 有為の奥山 今日越えて + 浅き夢見じ 酔ひもせず + +*Accented Cyrillic:* + +/(This section contributed by Vladimir Marinov.)/ + +In Bulgarian it is desirable, customary, or in some cases required to +write accents over vowels. Unfortunately, no computer character sets +contain the full repertoire of accented Cyrillic letters. With Unicode, +however, it is possible to combine any Cyrillic letter with any +combining accent. The appearance of the result depends on the font and +the rendering engine. Here are two examples. + + 1. Той видя бялата коса́ по главата и́ и ко́са на рамото и́, и ре́че да и́ + рече́: "Пара́та по́ па́ри от па́рата, не ща пари́!", но си поми́сли: + "Хей, помисли́ си! А́ и́ река, а́ е скочила в тази река, която щеше да + тече́, а не те́че." + + 2. По пъ́тя пъту́ват кю́рди и югославя́ни. + + + ------------------------------------------------------------------------ + HTML Features + +Here is the Russian alphabet (uppercase only) coded in three different +ways, which should look identical: + + 1. АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ /(Literal UTF-8)/ + 2. АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ /(Decimal numeric character + reference)/ + 3. АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ /(Hexadecimal numeric character + reference)/ + +In another test, we use HTML language tags to distinguish Bulgarian, +Russian, and Serbian +, which have +different italic forms for lowercase б, г, д, п, and/or т: + + *Bulgarian*: [ бгдпт ] [ /бгдпт/ ] / Мога да ям стъкло и не + ме боли./ + *Russian*: [ бгдпт ] [ /бгдпт/ ] /Я могу есть стекло, это мне + не вредит./ + *Serbian*: [ бгдпт ] [ /бгдпт/ ] /Могу јести стакло а да ми + не шкоди./ + + + ------------------------------------------------------------------------ + Credits, Tools, and Commentary + +*Credits:* + The "I can eat glass" phrase and the initial collection of + translations: Ethan Mollick + . Transcription / conversion + to UTF-8: Frank da Cruz. *Albanian:* Sindi Keesan. *Afrikaans:* + Johan Fourie, Kevin Poalses. *Anglo Saxon:* Frank da Cruz. *Arabic:* + Najib Tounsi. *Armenian:* Vaçe Kundakçı. *Belarusian:* Alexey + Chernyak. *Bengali:* Somnath Purkayastha, Deepayan Sarkar. + *Bislama:* Dan McGarry. *Braille:* Frank da Cruz. *Bulgarian:* Sindi + Keesan, Guentcho Skordev, Vladimir Marinov. *Cabo Verde Creole:* + Cláudio Alexandre Duarte. *Chinese:* Jack Soo, Wong Pui Lam. + *Chinook Jargon:* David Robertson. *Cornish:* Chris Stephens. + *Croatian:* Marjan Baće. *Czech:* Stanislav Pecha, Radovan Garabík. + *Dutch:* Peter Gotink. Pim Blokland, Rob Daniel, Rob de Wit. + *Erzian:* Jack Rueter. *Esperanto:* Franko Luin, Radovan Garabík. + *Estonian:* Meelis Roos. *Farsi/Persian:* Payam Elahi. *Finnish:* + Sampsa Toivanen. *French:* Luc Carissimo, Anne Colin du Terrail, + Sean M. Burke. *Galician:* Laura Probaos. *Georgian:* Giorgi + Lebanidze. *German:* Christoph Päper, Otto Stolz, Karl Pentzlin, + Frank da Cruz. *Gothic:* Aurélien Coudurier. *Greek:* Ariel Glenn, + Constantine Stathopoulos, Siva Nataraja. *Hebrew:* Jonathan Rosenne, + Tal Barnea. *Hausa:* Malami Buba, Tom Gewecke. *Hawaiian:* na + Hauʻoli Motta, Anela de Rego, Kaliko Trapp. *Hindi:* Shirish Kalele. + *Hungarian:* András Rácz, Mark Holczhammer. *Icelandic:* Andrés + Magnússon. *International Phonetic Alphabet (IPA):* Siva Nataraja / + Vincent Ramos. *Irish:* Michael Everson, Marion Gunn, James Kass, + Curtis Clark. *Italian:* Thomas De Bellis. *Japanese:* Makoto + Takahashi, Yurio Miyazawa. *Kirchröadsj:* Roger Stoffers. *Kreyòl:* + Sean M. Burke. *Korean:* Jungshik Shin. *Lëtzebuergescht:* Stefaan + Eeckels. *Lithuanian:* Gediminas Grigas. *Lojban:* Edward Cherlin. + *Lusatian:* Ronald Schaffhirt. *Macedonian:* Sindi Keesan. *Malay:* + Zarina Mustapha. *Manx:* Éanna Ó Brádaigh. *Marathi:* Shirish + Kalele. *Marquesan:* Kaliko Trapp. *Middle English:* Frank da Cruz. + *Milanese:* Marco Cimarosti. *Mongolian:* Tom Gewecke. *Napoletano:* + Diego Quintano. *Navajo:* Tom Gewecke. *Nórdicg* + : Yẃlyan Rott. + *Norwegian:* Herman Ranes. *Odenwälderisch:* Alexander Heß. *Old + Irish:* Michael Everson. *Old Norse:* Andrés Magnússon. + *Papiamentu:* Bianca and Denise Zanardi. *Pashto:* N.R. Liwal. + *Pfälzisch:* Dr. Johannes Sander. *Picard:* Philippe Mennecier. + *Polish:* Juliusz Chroboczek. *Portuguese:* "Cláudio" Alexandre + Duarte, Bianca and Denise Zanardi, Pedro Palhoto Matos, Wagner + Amaral. *Québécois:* Laurent Detillieux. *Roman:* Pierpaolo + Bernardi. *Romanian:* Juliusz Chroboczek, Ionel Mugurel. + *Ruhrdeutsch:* "Timwi". *Russian:* Alexey Chernyak, Serge + Nesterovitch. *Sami:* Anne Colin du Terrail, Luc Carissimo. + *Sanskrit:* Siva Nataraja / Vincent Ramos. *Sächsisch:* André + Müller. *Schwäbisch:* Otto Stolz. *Scots:* Jonathan Riddell. + *Serbian:* Sindi Keesan, Ranko Narancic, Boris Daljevic, Szilvia + Csorba. *Slovak:* G. Adam Stanislav, Radovan Garabík. *Slovenian:* + Albert Kolar. *Spanish:* Aleida Muñoz + , Laura Probaos. *Swahili:* Ronald + Schaffhirt. *Swedish:* Christian Rose, Bengt Larsson. *Taiwanese:* + Henry H. Tan-Tenn. *Tagalog:* Jim Soliven. *Tamil:* Vasee + Vaseeharan. *Tibetan:* D. Germano, Tom Gewecke. *Thai:* Alan Wood's + wife. *Turkish:* Vaçe Kundakçı, Tom Gewecke, Merlign Olnon. + *Ukrainian:* Michael Zajac. *Urdu:* Mustafa Ali. *Vietnamese* + : Dixon Au, [James] Đỗ Bá Phước 杜 伯 福. + *Walloon:* Pablo Saratxaga. *Welsh:* Geiriadur Prifysgol Cymru + (Andrew). *Yiddish:* Mark David, *Zeneise:* Angelo Pavese. + +*Tools Used to Create This Web Page:* + The UTF8-aware Kermit 95 terminal emulator on Windows, to + a Unix host with the EMACS + text editor. Kermit 95 displays UTF-8 and also allows keyboard entry + of arbitrary Unicode BMP characters as 4 hex digits, as shown HERE + . Hex codes for Unicode values can be found in The + Unicode Standard + (recommended) and the online code charts + . When submissions arrive by email + encoded in some other character set (Latin-1, Latin-2, KOI, various + PC code pages, JEUC, etc), I use the TRANSLATE command of C-Kermit + on the Unix host (where I read my mail ) + to convert the character set to UTF-8 (I could also use Kermit 95 + for this; it has the same TRANSLATE command). That's it -- no "Web + authoring" tools, no locales, no "smart" anything. It's just plain + text, nothing more. By the way, there's nothing special about EMACS + -- any text editor will do, providing it allows entry of arbitrary + 8-bit bytes as text, including the 0x80-0x9F "C1" range. EMACS 21.1 + actually supports UTF-8; earlier versions don't know about it and + display the octal codes; either way is OK for this purpose. + +*Commentary:* + Date: Wed, 27 Feb 2002 13:21:59 +0100 + From: "Bruno DEDOMINICIS" + Subject: Je peux manger du verre, cela ne me fait pas mal. + + I just found out your website and it makes me feel like proposing an + interpretation of the choice of this peculiar phrase. + + Glass is transparent and can hurt as everyone knows. The relation + between people and civilisations is sometimes effusional and more + often rude. The concept of breaking frontiers through globalization, + in a way, is also an attempt to deny any difference. Isn't + "transparency" the flag of modernity? Nothing should be hidden any + more, authority is obsolete, and the new powers are supposed to + reign through loving and smiling and no more through coercion... + + Eating glass without pain sounds like a very nice metaphor of this + attempt. That is, frontiers should become glass transparent first, + and be denied by incorporating them. On the reverse, it shows that + through globalization, frontiers undergo a process of displacement, + that is, when they are not any more speakable, they become repressed + from the speech and are therefore incorporated and might become + painful symptoms, as for example what happens when one tries to eat + glass. + + The frontiers that used to separate bodies one from another tend to + divide bodies from within and make them suffer.... The chosen phrase + then appears as a denial of the symptom that might result from the + destitution of traditional frontiers. + + Best, + Bruno De Dominicis, Paris, France + +*Other Unicode pages onsite:* + + * Peace in All Languages + * Frank's Compulsive Guide to Postal Addresses + (especially the Index ) + * Representing Middle English on the Web with UTF-8 + * The Kermit Bibliography (in UTF-8) + * Interchange of Non-English Computer Text (UTF-8 + math and box-drawing) + * Unicode Table (in UTF-8) + +*Unicode samplers offsite:* + + * Michael Everson's Bibliography of Typography and Scripts + + * Sample Unicode Test Pages and Script Links + + * I don't know, I only work here + * Anyone can be provincial! + + * Transcriptions of "Unicode" + + * Example Unicode Usage for Business Applications + + * UTF-8 and Unicode FAQ for Unix/Linux + + +*Unicode fonts:* + + * Unicode Fonts for Windows Computers + (Alan Wood) + * Unicode Fonts and Tools for X11 + (Markus Kuhn) + * Everson Mono (Michael Everson) + * Agfa Monotype + +[ Kermit 95 ] [ K95 Screen Shots ] [ C-Kermit + ] [ Kermit Home ] [ Display Problems? + ] [ The Unicode +Consortium ] + +------------------------------------------------------------------------ +UTF-8 Sampler / The Kermit Project / Columbia University + / kermit@columbia.edu + + diff --git a/includes/utf8/tests/index.php b/includes/utf8/tests/index.php new file mode 100644 index 0000000..0ee8b3a --- /dev/null +++ b/includes/utf8/tests/index.php @@ -0,0 +1,39 @@ + + + +PHPUTF8 Tests + + + + + + \ No newline at end of file diff --git a/includes/utf8/tests/runtests.php b/includes/utf8/tests/runtests.php new file mode 100644 index 0000000..0d59f4e --- /dev/null +++ b/includes/utf8/tests/runtests.php @@ -0,0 +1,32 @@ +GroupTest('All PHPUTF8 Tests'); + $this->loadGroups(); + } + + function loadGroups() { + $path = dirname(__FILE__).'/cases'; + if ( $d = opendir($path) ) { + while (($file = readdir($d)) !== false) { + if ( is_file($path.'/'.$file) ) { + $farray = explode('.',$file); + if ( $farray[1] == 'test' ) { + $this->AddTestFile($path.'/'.$file); + } + } + } + closedir($d); + } + } +} +/** +* Run the tests +*/ +$test = &new AllTests(); +$test->run(getTestReporter()); diff --git a/includes/utf8/trim.php b/includes/utf8/trim.php new file mode 100644 index 0000000..7f1cb3e --- /dev/null +++ b/includes/utf8/trim.php @@ -0,0 +1,68 @@ + +* @see http://www.php.net/ltrim +* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php +* @return string +* @package utf8 +* @subpackage strings +*/ +function utf8_ltrim( $str, $charlist = FALSE ) { + if($charlist === FALSE) return ltrim($str); + + //quote charlist for use in a characterclass + $charlist = preg_replace('!([\\\\\\-\\]\\[/^])!','\\\${1}',$charlist); + + return preg_replace('/^['.$charlist.']+/u','',$str); +} + +//--------------------------------------------------------------- +/** +* UTF-8 aware replacement for rtrim() +* Note: you only need to use this if you are supplying the charlist +* optional arg and it contains UTF-8 characters. Otherwise rtrim will +* work normally on a UTF-8 string +* @author Andreas Gohr +* @see http://www.php.net/rtrim +* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php +* @return string +* @package utf8 +* @subpackage strings +*/ +function utf8_rtrim( $str, $charlist = FALSE ) { + if($charlist === FALSE) return rtrim($str); + + //quote charlist for use in a characterclass + $charlist = preg_replace('!([\\\\\\-\\]\\[/^])!','\\\${1}',$charlist); + + return preg_replace('/['.$charlist.']+$/u','',$str); +} + +//--------------------------------------------------------------- +/** +* UTF-8 aware replacement for trim() +* Note: you only need to use this if you are supplying the charlist +* optional arg and it contains UTF-8 characters. Otherwise trim will +* work normally on a UTF-8 string +* @author Andreas Gohr +* @see http://www.php.net/trim +* @see http://dev.splitbrain.org/view/darcs/dokuwiki/inc/utf8.php +* @return string +* @package utf8 +* @subpackage strings +*/ +function utf8_trim( $str, $charlist = FALSE ) { + if($charlist === FALSE) return trim($str); + return utf8_ltrim(utf8_rtrim($str, $charlist), $charlist); +} \ No newline at end of file diff --git a/includes/utf8/ucfirst.php b/includes/utf8/ucfirst.php new file mode 100644 index 0000000..3897bfe --- /dev/null +++ b/includes/utf8/ucfirst.php @@ -0,0 +1,34 @@ + +* if ( utf8_is_ascii($someString) ) { +* // It's just ASCII - use the native PHP version +* $someString = strtolower($someString); +* } else { +* $someString = utf8_strtolower($someString); +* } +* +* +* @param string +* @return boolean TRUE if it's all ASCII +* @package utf8 +* @subpackage ascii +* @see utf8_is_ascii_ctrl +*/ +function utf8_is_ascii($str) { + // Search for any bytes which are outside the ASCII range... + return (preg_match('/(?:[^\x00-\x7F])/',$str) !== 1); +} + +//-------------------------------------------------------------------- +/** +* Tests whether a string contains only 7bit ASCII bytes with device +* control codes omitted. The device control codes can be found on the +* second table here: http://www.w3schools.com/tags/ref_ascii.asp +* +* @param string +* @return boolean TRUE if it's all ASCII without device control codes +* @package utf8 +* @subpackage ascii +* @see utf8_is_ascii +*/ +function utf8_is_ascii_ctrl($str) { + if ( strlen($str) > 0 ) { + // Search for any bytes which are outside the ASCII range, + // or are device control codes + return (preg_match('/[^\x09\x0A\x0D\x20-\x7E]/',$str) !== 1); + } + return FALSE; +} + +//-------------------------------------------------------------------- +/** +* Strip out all non-7bit ASCII bytes +* If you need to transmit a string to system which you know can only +* support 7bit ASCII, you could use this function. +* @param string +* @return string with non ASCII bytes removed +* @package utf8 +* @subpackage ascii +* @see utf8_strip_non_ascii_ctrl +*/ +function utf8_strip_non_ascii($str) { + ob_start(); + while ( preg_match( + '/^([\x00-\x7F]+)|([^\x00-\x7F]+)/S', + $str, $matches) ) { + if ( !isset($matches[2]) ) { + echo $matches[0]; + } + $str = substr($str, strlen($matches[0])); + } + $result = ob_get_contents(); + ob_end_clean(); + return $result; +} + +//-------------------------------------------------------------------- +/** +* Strip out device control codes in the ASCII range +* which are not permitted in XML. Note that this leaves +* multi-byte characters untouched - it only removes device +* control codes +* @see http://hsivonen.iki.fi/producing-xml/#controlchar +* @param string +* @return string control codes removed +*/ +function utf8_strip_ascii_ctrl($str) { + ob_start(); + while ( preg_match( + '/^([^\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+)|([\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+)/S', + $str, $matches) ) { + if ( !isset($matches[2]) ) { + echo $matches[0]; + } + $str = substr($str, strlen($matches[0])); + } + $result = ob_get_contents(); + ob_end_clean(); + return $result; +} + +//-------------------------------------------------------------------- +/** +* Strip out all non 7bit ASCII bytes and ASCII device control codes. +* For a list of ASCII device control codes see the 2nd table here: +* http://www.w3schools.com/tags/ref_ascii.asp +* +* @param string +* @return boolean TRUE if it's all ASCII +* @package utf8 +* @subpackage ascii +*/ +function utf8_strip_non_ascii_ctrl($str) { + ob_start(); + while ( preg_match( + '/^([\x09\x0A\x0D\x20-\x7E]+)|([^\x09\x0A\x0D\x20-\x7E]+)/S', + $str, $matches) ) { + if ( !isset($matches[2]) ) { + echo $matches[0]; + } + $str = substr($str, strlen($matches[0])); + } + $result = ob_get_contents(); + ob_end_clean(); + return $result; +} + +//--------------------------------------------------------------- +/** +* Replace accented UTF-8 characters by unaccented ASCII-7 "equivalents". +* The purpose of this function is to replace characters commonly found in Latin +* alphabets with something more or less equivalent from the ASCII range. This can +* be useful for converting a UTF-8 to something ready for a filename, for example. +* Following the use of this function, you would probably also pass the string +* through utf8_strip_non_ascii to clean out any other non-ASCII chars +* Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) +* letters. Default is to deaccent both cases ($case = 0) +* +* For a more complete implementation of transliteration, see the utf8_to_ascii package +* available from the phputf8 project downloads: +* http://prdownloads.sourceforge.net/phputf8 +* +* @param string UTF-8 string +* @param int (optional) -1 lowercase only, +1 uppercase only, 1 both cases +* @param string UTF-8 with accented characters replaced by ASCII chars +* @return string accented chars replaced with ascii equivalents +* @author Andreas Gohr +* @package utf8 +* @subpackage ascii +*/ +function utf8_accents_to_ascii( $str, $case=0 ){ + + static $UTF8_LOWER_ACCENTS = NULL; + static $UTF8_UPPER_ACCENTS = NULL; + + if($case <= 0){ + + if ( is_null($UTF8_LOWER_ACCENTS) ) { + $UTF8_LOWER_ACCENTS = array( + 'à' => 'a', 'ô' => 'o', 'ď' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o', + 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k', + 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o', + 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o', + 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c', + 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't', + 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l', + 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z', + 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't', + 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', + 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j', + 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o', + 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g', + 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', + 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'ĕ' => 'e', + ); + } + + $str = str_replace( + array_keys($UTF8_LOWER_ACCENTS), + array_values($UTF8_LOWER_ACCENTS), + $str + ); + } + + if($case >= 0){ + if ( is_null($UTF8_UPPER_ACCENTS) ) { + $UTF8_UPPER_ACCENTS = array( + 'À' => 'A', 'Ô' => 'O', 'Ď' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Š' => 'S', 'Ơ' => 'O', + 'Ă' => 'A', 'Ř' => 'R', 'Ț' => 'T', 'Ň' => 'N', 'Ā' => 'A', 'Ķ' => 'K', + 'Ŝ' => 'S', 'Ỳ' => 'Y', 'Ņ' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'Ṗ' => 'P', 'Ó' => 'O', + 'Ú' => 'U', 'Ě' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'Ċ' => 'C', 'Õ' => 'O', + 'Ṡ' => 'S', 'Ø' => 'O', 'Ģ' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ė' => 'E', 'Ĉ' => 'C', + 'Ś' => 'S', 'Î' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Ŵ' => 'W', 'Ṫ' => 'T', + 'Ū' => 'U', 'Č' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ą' => 'A', 'Ł' => 'L', + 'Ų' => 'U', 'Ů' => 'U', 'Ş' => 'S', 'Ğ' => 'G', 'Ļ' => 'L', 'Ƒ' => 'F', 'Ž' => 'Z', + 'Ẃ' => 'W', 'Ḃ' => 'B', 'Å' => 'A', 'Ì' => 'I', 'Ï' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', + 'Ŗ' => 'R', 'Ä' => 'Ae', 'Í' => 'I', 'Ŕ' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ò' => 'O', + 'Ē' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Ĝ' => 'G', 'Đ' => 'D', 'Ĵ' => 'J', + 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Ţ' => 'T', 'Ý' => 'Y', 'Ő' => 'O', + 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Ż' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ġ' => 'G', + 'Ṁ' => 'M', 'Ō' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Į' => 'I', 'Ź' => 'Z', 'Á' => 'A', + 'Û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae', 'Ĕ' => 'E', + ); + } + $str = str_replace( + array_keys($UTF8_UPPER_ACCENTS), + array_values($UTF8_UPPER_ACCENTS), + $str + ); + } + + return $str; + +} diff --git a/includes/utf8/utils/bad.php b/includes/utf8/utils/bad.php new file mode 100644 index 0000000..52f1aee --- /dev/null +++ b/includes/utf8/utils/bad.php @@ -0,0 +1,421 @@ + 0 ) { + return $badList; + } + return FALSE; +} + +//-------------------------------------------------------------------- +/** +* Strips out any bad bytes from a UTF-8 string and returns the rest +* PCRE Pattern to locate bad bytes in a UTF-8 string +* Comes from W3 FAQ: Multilingual Forms +* Note: modified to include full ASCII range including control chars +* @see http://www.w3.org/International/questions/qa-forms-utf-8 +* @param string +* @return string +* @package utf8 +* @subpackage bad +*/ +function utf8_bad_strip($str) { + $UTF8_BAD = + '([\x00-\x7F]'. # ASCII (including control chars) + '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte + '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs + '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte + '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates + '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 + '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 + '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 + '|(.{1}))'; # invalid byte + ob_start(); + while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { + if ( !isset($matches[2])) { + echo $matches[0]; + } + $str = substr($str,strlen($matches[0])); + } + $result = ob_get_contents(); + ob_end_clean(); + return $result; +} + +//-------------------------------------------------------------------- +/** +* Replace bad bytes with an alternative character - ASCII character +* recommended is replacement char +* PCRE Pattern to locate bad bytes in a UTF-8 string +* Comes from W3 FAQ: Multilingual Forms +* Note: modified to include full ASCII range including control chars +* @see http://www.w3.org/International/questions/qa-forms-utf-8 +* @param string to search +* @param string to replace bad bytes with (defaults to '?') - use ASCII +* @return string +* @package utf8 +* @subpackage bad +*/ +function utf8_bad_replace($str, $replace = '?') { + $UTF8_BAD = + '([\x00-\x7F]'. # ASCII (including control chars) + '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte + '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs + '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte + '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates + '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 + '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 + '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 + '|(.{1}))'; # invalid byte + ob_start(); + while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { + if ( !isset($matches[2])) { + echo $matches[0]; + } else { + echo $replace; + } + $str = substr($str,strlen($matches[0])); + } + $result = ob_get_contents(); + ob_end_clean(); + return $result; +} + +//-------------------------------------------------------------------- +/** +* Return code from utf8_bad_identify() when a five octet sequence is detected. +* Note: 5 octets sequences are valid UTF-8 but are not supported by Unicode so +* do not represent a useful character +* @see utf8_bad_identify +* @package utf8 +* @subpackage bad +*/ +define('UTF8_BAD_5OCTET',1); + +/** +* Return code from utf8_bad_identify() when a six octet sequence is detected. +* Note: 6 octets sequences are valid UTF-8 but are not supported by Unicode so +* do not represent a useful character +* @see utf8_bad_identify +* @package utf8 +* @subpackage bad +*/ +define('UTF8_BAD_6OCTET',2); + +/** +* Return code from utf8_bad_identify(). +* Invalid octet for use as start of multi-byte UTF-8 sequence +* @see utf8_bad_identify +* @package utf8 +* @subpackage bad +*/ +define('UTF8_BAD_SEQID',3); + +/** +* Return code from utf8_bad_identify(). +* From Unicode 3.1, non-shortest form is illegal +* @see utf8_bad_identify +* @package utf8 +* @subpackage bad +*/ +define('UTF8_BAD_NONSHORT',4); + +/** +* Return code from utf8_bad_identify(). +* From Unicode 3.2, surrogate characters are illegal +* @see utf8_bad_identify +* @package utf8 +* @subpackage bad +*/ +define('UTF8_BAD_SURROGATE',5); + +/** +* Return code from utf8_bad_identify(). +* Codepoints outside the Unicode range are illegal +* @see utf8_bad_identify +* @package utf8 +* @subpackage bad +*/ +define('UTF8_BAD_UNIOUTRANGE',6); + +/** +* Return code from utf8_bad_identify(). +* Incomplete multi-octet sequence +* Note: this is kind of a "catch-all" +* @see utf8_bad_identify +* @package utf8 +* @subpackage bad +*/ +define('UTF8_BAD_SEQINCOMPLETE',7); + +//-------------------------------------------------------------------- +/** +* Reports on the type of bad byte found in a UTF-8 string. Returns a +* status code on the first bad byte found +* @author +* @param string UTF-8 encoded string +* @return mixed integer constant describing problem or FALSE if valid UTF-8 +* @see utf8_bad_explain +* @see http://hsivonen.iki.fi/php-utf8/ +* @package utf8 +* @subpackage bad +*/ +function utf8_bad_identify($str, &$i) { + + $mState = 0; // cached expected number of octets after the current octet + // until the beginning of the next UTF8 character sequence + $mUcs4 = 0; // cached Unicode character + $mBytes = 1; // cached expected number of octets in the current sequence + + $len = strlen($str); + + for($i = 0; $i < $len; $i++) { + + $in = ord($str{$i}); + + if ( $mState == 0) { + + // When mState is zero we expect either a US-ASCII character or a + // multi-octet sequence. + if (0 == (0x80 & ($in))) { + // US-ASCII, pass straight through. + $mBytes = 1; + + } else if (0xC0 == (0xE0 & ($in))) { + // First octet of 2 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x1F) << 6; + $mState = 1; + $mBytes = 2; + + } else if (0xE0 == (0xF0 & ($in))) { + // First octet of 3 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x0F) << 12; + $mState = 2; + $mBytes = 3; + + } else if (0xF0 == (0xF8 & ($in))) { + // First octet of 4 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x07) << 18; + $mState = 3; + $mBytes = 4; + + } else if (0xF8 == (0xFC & ($in))) { + + /* First octet of 5 octet sequence. + * + * This is illegal because the encoded codepoint must be either + * (a) not the shortest form or + * (b) outside the Unicode range of 0-0x10FFFF. + */ + + return UTF8_BAD_5OCTET; + + } else if (0xFC == (0xFE & ($in))) { + + // First octet of 6 octet sequence, see comments for 5 octet sequence. + return UTF8_BAD_6OCTET; + + } else { + // Current octet is neither in the US-ASCII range nor a legal first + // octet of a multi-octet sequence. + return UTF8_BAD_SEQID; + + } + + } else { + + // When mState is non-zero, we expect a continuation of the multi-octet + // sequence + if (0x80 == (0xC0 & ($in))) { + + // Legal continuation. + $shift = ($mState - 1) * 6; + $tmp = $in; + $tmp = ($tmp & 0x0000003F) << $shift; + $mUcs4 |= $tmp; + + /** + * End of the multi-octet sequence. mUcs4 now contains the final + * Unicode codepoint to be output + */ + if (0 == --$mState) { + + // From Unicode 3.1, non-shortest form is illegal + if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || + ((3 == $mBytes) && ($mUcs4 < 0x0800)) || + ((4 == $mBytes) && ($mUcs4 < 0x10000)) ) { + return UTF8_BAD_NONSHORT; + + // From Unicode 3.2, surrogate characters are illegal + } else if (($mUcs4 & 0xFFFFF800) == 0xD800) { + return UTF8_BAD_SURROGATE; + + // Codepoints outside the Unicode range are illegal + } else if ($mUcs4 > 0x10FFFF) { + return UTF8_BAD_UNIOUTRANGE; + } + + //initialize UTF8 cache + $mState = 0; + $mUcs4 = 0; + $mBytes = 1; + } + + } else { + // ((0xC0 & (*in) != 0x80) && (mState != 0)) + // Incomplete multi-octet sequence. + $i--; + return UTF8_BAD_SEQINCOMPLETE; + } + } + } + + if ( $mState != 0 ) { + // Incomplete multi-octet sequence. + $i--; + return UTF8_BAD_SEQINCOMPLETE; + } + + // No bad octets found + $i = NULL; + return FALSE; +} + +//-------------------------------------------------------------------- +/** +* Takes a return code from utf8_bad_identify() are returns a message +* (in English) explaining what the problem is. +* @param int return code from utf8_bad_identify +* @return mixed string message or FALSE if return code unknown +* @see utf8_bad_identify +* @package utf8 +* @subpackage bad +*/ +function utf8_bad_explain($code) { + + switch ($code) { + + case UTF8_BAD_5OCTET: + return 'Five octet sequences are valid UTF-8 but are not supported by Unicode'; + break; + + case UTF8_BAD_6OCTET: + return 'Six octet sequences are valid UTF-8 but are not supported by Unicode'; + break; + + case UTF8_BAD_SEQID: + return 'Invalid octet for use as start of multi-byte UTF-8 sequence'; + break; + + case UTF8_BAD_NONSHORT: + return 'From Unicode 3.1, non-shortest form is illegal'; + break; + + case UTF8_BAD_SURROGATE: + return 'From Unicode 3.2, surrogate characters are illegal'; + break; + + case UTF8_BAD_UNIOUTRANGE: + return 'Codepoints outside the Unicode range are illegal'; + break; + + case UTF8_BAD_SEQINCOMPLETE: + return 'Incomplete multi-octet sequence'; + break; + + } + + trigger_error('Unknown error code: '.$code,E_USER_WARNING); + return FALSE; + +} diff --git a/includes/utf8/utils/patterns.php b/includes/utf8/utils/patterns.php new file mode 100644 index 0000000..a3dee6a --- /dev/null +++ b/includes/utf8/utils/patterns.php @@ -0,0 +1,69 @@ + +* @param string string to locate index in +* @param int (n times) +* @return mixed - int if only one input int, array if more +* @return boolean TRUE if it's all ASCII +* @package utf8 +* @subpackage position +*/ +function utf8_byte_position() { + + $args = func_get_args(); + $str =& array_shift($args); + if (!is_string($str)) return false; + + $result = array(); + + // trivial byte index, character offset pair + $prev = array(0,0); + + // use a short piece of str to estimate bytes per character + // $i (& $j) -> byte indexes into $str + $i = utf8_locate_next_chr($str, 300); + + // $c -> character offset into $str + $c = strlen(utf8_decode(substr($str,0,$i))); + + // deal with arguments from lowest to highest + sort($args); + + foreach ($args as $offset) { + // sanity checks FIXME + + // 0 is an easy check + if ($offset == 0) { $result[] = 0; continue; } + + // ensure no endless looping + $safety_valve = 50; + + do { + + if ( ($c - $prev[1]) == 0 ) { + // Hack: gone past end of string + $error = 0; + $i = strlen($str); + break; + } + + $j = $i + (int)(($offset-$c) * ($i - $prev[0]) / ($c - $prev[1])); + + // correct to utf8 character boundary + $j = utf8_locate_next_chr($str, $j); + + // save the index, offset for use next iteration + $prev = array($i,$c); + + if ($j > $i) { + // determine new character offset + $c += strlen(utf8_decode(substr($str,$i,$j-$i))); + } else { + // ditto + $c -= strlen(utf8_decode(substr($str,$j,$i-$j))); + } + + $error = abs($c-$offset); + + // ready for next time around + $i = $j; + + // from 7 it is faster to iterate over the string + } while ( ($error > 7) && --$safety_valve) ; + + if ($error && $error <= 7) { + + if ($c < $offset) { + // move up + while ($error--) { $i = utf8_locate_next_chr($str,++$i); } + } else { + // move down + while ($error--) { $i = utf8_locate_current_chr($str,--$i); } + } + + // ready for next arg + $c = $offset; + } + $result[] = $i; + } + + if ( count($result) == 1 ) { + return $result[0]; + } + + return $result; +} + +//-------------------------------------------------------------------- +/** +* Given a string and any byte index, returns the byte index +* of the start of the current UTF-8 character, relative to supplied +* position. If the current character begins at the same place as the +* supplied byte index, that byte index will be returned. Otherwise +* this function will step backwards, looking for the index where +* curent UTF-8 character begins +* @author Chris Smith +* @param string +* @param int byte index in the string +* @return int byte index of start of next UTF-8 character +* @package utf8 +* @subpackage position +*/ +function utf8_locate_current_chr( &$str, $idx ) { + + if ($idx <= 0) return 0; + + $limit = strlen($str); + if ($idx >= $limit) return $limit; + + // Binary value for any byte after the first in a multi-byte UTF-8 character + // will be like 10xxxxxx so & 0xC0 can be used to detect this kind + // of byte - assuming well formed UTF-8 + while ($idx && ((ord($str[$idx]) & 0xC0) == 0x80)) $idx--; + + return $idx; +} + +//-------------------------------------------------------------------- +/** +* Given a string and any byte index, returns the byte index +* of the start of the next UTF-8 character, relative to supplied +* position. If the next character begins at the same place as the +* supplied byte index, that byte index will be returned. +* @author Chris Smith +* @param string +* @param int byte index in the string +* @return int byte index of start of next UTF-8 character +* @package utf8 +* @subpackage position +*/ +function utf8_locate_next_chr( &$str, $idx ) { + + if ($idx <= 0) return 0; + + $limit = strlen($str); + if ($idx >= $limit) return $limit; + + // Binary value for any byte after the first in a multi-byte UTF-8 character + // will be like 10xxxxxx so & 0xC0 can be used to detect this kind + // of byte - assuming well formed UTF-8 + while (($idx < $limit) && ((ord($str[$idx]) & 0xC0) == 0x80)) $idx++; + + return $idx; +} + diff --git a/includes/utf8/utils/specials.php b/includes/utf8/utils/specials.php new file mode 100644 index 0000000..30613ee --- /dev/null +++ b/includes/utf8/utils/specials.php @@ -0,0 +1,131 @@ + +* @param string $string The UTF8 string to strip of special chars +* @param string (optional) $repl Replace special with this string +* @return string with common non-alphanumeric characters removed +* @see utf8_specials_pattern +*/ +function utf8_strip_specials($string, $repl=''){ + return preg_replace(utf8_specials_pattern(), $repl, $string); +} + + diff --git a/includes/utf8/utils/unicode.php b/includes/utf8/utils/unicode.php new file mode 100644 index 0000000..d234a2a --- /dev/null +++ b/includes/utf8/utils/unicode.php @@ -0,0 +1,269 @@ + 0xFFFF. Occurrances of the BOM are ignored. Surrogates +* are not allowed. +* Returns false if the input string isn't a valid UTF-8 octet sequence +* and raises a PHP error at level E_USER_WARNING +* Note: this function has been modified slightly in this library to +* trigger errors on encountering bad bytes +* @author +* @param string UTF-8 encoded string +* @return mixed array of unicode code points or FALSE if UTF-8 invalid +* @see utf8_from_unicode +* @see http://hsivonen.iki.fi/php-utf8/ +* @package utf8 +* @subpackage unicode +*/ +function utf8_to_unicode($str) { + $mState = 0; // cached expected number of octets after the current octet + // until the beginning of the next UTF8 character sequence + $mUcs4 = 0; // cached Unicode character + $mBytes = 1; // cached expected number of octets in the current sequence + + $out = array(); + + $len = strlen($str); + + for($i = 0; $i < $len; $i++) { + + $in = ord($str{$i}); + + if ( $mState == 0) { + + // When mState is zero we expect either a US-ASCII character or a + // multi-octet sequence. + if (0 == (0x80 & ($in))) { + // US-ASCII, pass straight through. + $out[] = $in; + $mBytes = 1; + + } else if (0xC0 == (0xE0 & ($in))) { + // First octet of 2 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x1F) << 6; + $mState = 1; + $mBytes = 2; + + } else if (0xE0 == (0xF0 & ($in))) { + // First octet of 3 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x0F) << 12; + $mState = 2; + $mBytes = 3; + + } else if (0xF0 == (0xF8 & ($in))) { + // First octet of 4 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x07) << 18; + $mState = 3; + $mBytes = 4; + + } else if (0xF8 == (0xFC & ($in))) { + /* First octet of 5 octet sequence. + * + * This is illegal because the encoded codepoint must be either + * (a) not the shortest form or + * (b) outside the Unicode range of 0-0x10FFFF. + * Rather than trying to resynchronize, we will carry on until the end + * of the sequence and let the later error handling code catch it. + */ + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x03) << 24; + $mState = 4; + $mBytes = 5; + + } else if (0xFC == (0xFE & ($in))) { + // First octet of 6 octet sequence, see comments for 5 octet sequence. + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 1) << 30; + $mState = 5; + $mBytes = 6; + + } else { + /* Current octet is neither in the US-ASCII range nor a legal first + * octet of a multi-octet sequence. + */ + trigger_error( + 'utf8_to_unicode: Illegal sequence identifier '. + 'in UTF-8 at byte '.$i, + E_USER_WARNING + ); + return FALSE; + + } + + } else { + + // When mState is non-zero, we expect a continuation of the multi-octet + // sequence + if (0x80 == (0xC0 & ($in))) { + + // Legal continuation. + $shift = ($mState - 1) * 6; + $tmp = $in; + $tmp = ($tmp & 0x0000003F) << $shift; + $mUcs4 |= $tmp; + + /** + * End of the multi-octet sequence. mUcs4 now contains the final + * Unicode codepoint to be output + */ + if (0 == --$mState) { + + /* + * Check for illegal sequences and codepoints. + */ + // From Unicode 3.1, non-shortest form is illegal + if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || + ((3 == $mBytes) && ($mUcs4 < 0x0800)) || + ((4 == $mBytes) && ($mUcs4 < 0x10000)) || + (4 < $mBytes) || + // From Unicode 3.2, surrogate characters are illegal + (($mUcs4 & 0xFFFFF800) == 0xD800) || + // Codepoints outside the Unicode range are illegal + ($mUcs4 > 0x10FFFF)) { + + trigger_error( + 'utf8_to_unicode: Illegal sequence or codepoint '. + 'in UTF-8 at byte '.$i, + E_USER_WARNING + ); + + return FALSE; + + } + + if (0xFEFF != $mUcs4) { + // BOM is legal but we don't want to output it + $out[] = $mUcs4; + } + + //initialize UTF8 cache + $mState = 0; + $mUcs4 = 0; + $mBytes = 1; + } + + } else { + /** + *((0xC0 & (*in) != 0x80) && (mState != 0)) + * Incomplete multi-octet sequence. + */ + trigger_error( + 'utf8_to_unicode: Incomplete multi-octet '. + ' sequence in UTF-8 at byte '.$i, + E_USER_WARNING + ); + + return FALSE; + } + } + } + return $out; +} + +//-------------------------------------------------------------------- +/** +* Takes an array of ints representing the Unicode characters and returns +* a UTF-8 string. Astral planes are supported ie. the ints in the +* input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates +* are not allowed. +* Returns false if the input array contains ints that represent +* surrogates or are outside the Unicode range +* and raises a PHP error at level E_USER_WARNING +* Note: this function has been modified slightly in this library to use +* output buffering to concatenate the UTF-8 string (faster) as well as +* reference the array by it's keys +* @param array of unicode code points representing a string +* @return mixed UTF-8 string or FALSE if array contains invalid code points +* @author +* @see utf8_to_unicode +* @see http://hsivonen.iki.fi/php-utf8/ +* @package utf8 +* @subpackage unicode +*/ +function utf8_from_unicode($arr) { + ob_start(); + + foreach (array_keys($arr) as $k) { + + # ASCII range (including control chars) + if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) { + + echo chr($arr[$k]); + + # 2 byte sequence + } else if ($arr[$k] <= 0x07ff) { + + echo chr(0xc0 | ($arr[$k] >> 6)); + echo chr(0x80 | ($arr[$k] & 0x003f)); + + # Byte order mark (skip) + } else if($arr[$k] == 0xFEFF) { + + // nop -- zap the BOM + + # Test for illegal surrogates + } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { + + // found a surrogate + trigger_error( + 'utf8_from_unicode: Illegal surrogate '. + 'at index: '.$k.', value: '.$arr[$k], + E_USER_WARNING + ); + + return FALSE; + + # 3 byte sequence + } else if ($arr[$k] <= 0xffff) { + + echo chr(0xe0 | ($arr[$k] >> 12)); + echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); + echo chr(0x80 | ($arr[$k] & 0x003f)); + + # 4 byte sequence + } else if ($arr[$k] <= 0x10ffff) { + + echo chr(0xf0 | ($arr[$k] >> 18)); + echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); + echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); + echo chr(0x80 | ($arr[$k] & 0x3f)); + + } else { + + trigger_error( + 'utf8_from_unicode: Codepoint out of Unicode range '. + 'at index: '.$k.', value: '.$arr[$k], + E_USER_WARNING + ); + + // out of range + return FALSE; + } + } + + $result = ob_get_contents(); + ob_end_clean(); + return $result; +} diff --git a/includes/utf8/utils/validation.php b/includes/utf8/utils/validation.php new file mode 100644 index 0000000..0f9fd37 --- /dev/null +++ b/includes/utf8/utils/validation.php @@ -0,0 +1,185 @@ + +* @param string UTF-8 encoded string +* @return boolean true if valid +* @see http://hsivonen.iki.fi/php-utf8/ +* @see utf8_compliant +* @package utf8 +* @subpackage validation +*/ +function utf8_is_valid($str) { + + $mState = 0; // cached expected number of octets after the current octet + // until the beginning of the next UTF8 character sequence + $mUcs4 = 0; // cached Unicode character + $mBytes = 1; // cached expected number of octets in the current sequence + + $len = strlen($str); + + for($i = 0; $i < $len; $i++) { + + $in = ord($str{$i}); + + if ( $mState == 0) { + + // When mState is zero we expect either a US-ASCII character or a + // multi-octet sequence. + if (0 == (0x80 & ($in))) { + // US-ASCII, pass straight through. + $mBytes = 1; + + } else if (0xC0 == (0xE0 & ($in))) { + // First octet of 2 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x1F) << 6; + $mState = 1; + $mBytes = 2; + + } else if (0xE0 == (0xF0 & ($in))) { + // First octet of 3 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x0F) << 12; + $mState = 2; + $mBytes = 3; + + } else if (0xF0 == (0xF8 & ($in))) { + // First octet of 4 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x07) << 18; + $mState = 3; + $mBytes = 4; + + } else if (0xF8 == (0xFC & ($in))) { + /* First octet of 5 octet sequence. + * + * This is illegal because the encoded codepoint must be either + * (a) not the shortest form or + * (b) outside the Unicode range of 0-0x10FFFF. + * Rather than trying to resynchronize, we will carry on until the end + * of the sequence and let the later error handling code catch it. + */ + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x03) << 24; + $mState = 4; + $mBytes = 5; + + } else if (0xFC == (0xFE & ($in))) { + // First octet of 6 octet sequence, see comments for 5 octet sequence. + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 1) << 30; + $mState = 5; + $mBytes = 6; + + } else { + /* Current octet is neither in the US-ASCII range nor a legal first + * octet of a multi-octet sequence. + */ + return FALSE; + + } + + } else { + + // When mState is non-zero, we expect a continuation of the multi-octet + // sequence + if (0x80 == (0xC0 & ($in))) { + + // Legal continuation. + $shift = ($mState - 1) * 6; + $tmp = $in; + $tmp = ($tmp & 0x0000003F) << $shift; + $mUcs4 |= $tmp; + + /** + * End of the multi-octet sequence. mUcs4 now contains the final + * Unicode codepoint to be output + */ + if (0 == --$mState) { + + /* + * Check for illegal sequences and codepoints. + */ + // From Unicode 3.1, non-shortest form is illegal + if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || + ((3 == $mBytes) && ($mUcs4 < 0x0800)) || + ((4 == $mBytes) && ($mUcs4 < 0x10000)) || + (4 < $mBytes) || + // From Unicode 3.2, surrogate characters are illegal + (($mUcs4 & 0xFFFFF800) == 0xD800) || + // Codepoints outside the Unicode range are illegal + ($mUcs4 > 0x10FFFF)) { + + return FALSE; + + } + + //initialize UTF8 cache + $mState = 0; + $mUcs4 = 0; + $mBytes = 1; + } + + } else { + /** + *((0xC0 & (*in) != 0x80) && (mState != 0)) + * Incomplete multi-octet sequence. + */ + + return FALSE; + } + } + } + return TRUE; +} + +//-------------------------------------------------------------------- +/** +* Tests whether a string complies as UTF-8. This will be much +* faster than utf8_is_valid but will pass five and six octet +* UTF-8 sequences, which are not supported by Unicode and +* so cannot be displayed correctly in a browser. In other words +* it is not as strict as utf8_is_valid but it's faster. If you use +* is to validate user input, you place yourself at the risk that +* attackers will be able to inject 5 and 6 byte sequences (which +* may or may not be a significant risk, depending on what you are +* are doing) +* @see utf8_is_valid +* @see http://www.php.net/manual/en/reference.pcre.pattern.modifiers.php#54805 +* @param string UTF-8 string to check +* @return boolean TRUE if string is valid UTF-8 +* @package utf8 +* @subpackage validation +*/ +function utf8_compliant($str) { + if ( strlen($str) == 0 ) { + return TRUE; + } + // If even just the first character can be matched, when the /u + // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow + // invalid, nothing at all will match, even if the string contains + // some valid sequences + return (preg_match('/^.{1}/us',$str,$ar) == 1); +} + diff --git a/index.php b/index.php index ad6f1cc..f89e64e 100644 --- a/index.php +++ b/index.php @@ -1,6 +1,6 @@ -function _playerAdd(anchor) { - var url = anchor.href; - var code = ''; - var code = code + ''; - var code = code + ''; - anchor.parentNode.innerHTML = code +' '+ anchor.parentNode.innerHTML; -} - -String.prototype.trim = function() { - return this.replace(/^\s+|\s+$/g, ''); -}; - var deleted = false; -function deleteBookmark(ele, input){ - var confirmDelete = " - "; - ele.style.display = 'none'; - ele.parentNode.innerHTML = ele.parentNode.innerHTML + confirmDelete; +function deleteBookmark(ele, input) { + $(ele).hide(); + $(ele).parent().append(" - "); + return false; } - function deleteCancelled(ele) { - var del = previousElement(ele.parentNode); - del.style.display = 'inline'; - ele.parentNode.parentNode.removeChild(ele.parentNode); - return false; -} - -function deleteConfirmed(ele, input, response) { - if (deleted == false) { - deleted = ele.parentNode.parentNode.parentNode; - } - var post = deleted; - post.className = 'xfolkentry deleted'; - if (response != '') { - post.style.display = 'none'; - deleted = false; - } else { - loadXMLDoc('ajaxDelete.php?id=' + input); - } + $(ele).parent().prev().show(); + $(ele).parent().remove(); + return false; } - -function previousElement(ele) { - ele = ele.previousSibling; - while (ele.nodeType != 1) { - ele = ele.previousSibling; - } - return ele; -} - -function isAvailable(input, response){ - var usernameField = document.getElementById("username"); - var username = usernameField.value; - username = username.toLowerCase(); - username = username.trim(); - var availability = document.getElementById("availability"); - if (username != '') { - usernameField.style.backgroundImage = 'url(loading.gif)'; - if (response != '') { - usernameField.style.backgroundImage = 'none'; - if (response == 'true') { - availability.className = 'available'; - availability.innerHTML = ''; - } else { - availability.className = 'not-available'; - availability.innerHTML = ''; - } - } else { - loadXMLDoc('ajaxIsAvailable.php?username=' + username); - } +function deleteConfirmed(ele, input) { + $.get("ajaxDelete.php?id=" + input, function(data) { + if (1 === parseInt(data)) { + $(ele).parents(".xfolkentry").slideUp(); } + }); + return false; } function useAddress(ele) { @@ -87,55 +36,29 @@ function useAddress(ele) { } } -function getTitle(input, response){ - var title = document.getElementById('titleField'); - if (title.value == '') { - title.style.backgroundImage = 'url(loading.gif)'; - if (response != null) { - title.style.backgroundImage = 'none'; - title.value = response; - } else if (input.indexOf('http') > -1) { - loadXMLDoc('ajaxGetTitle.php?url=' + input); - } else { - return false; - } - } -} - -var xmlhttp; -function loadXMLDoc(url) { - // Native - if (window.XMLHttpRequest) { - xmlhttp = new XMLHttpRequest(); - xmlhttp.onreadystatechange = processStateChange; - xmlhttp.open("GET", url, true); - xmlhttp.send(null); - // ActiveX - } else if (window.ActiveXObject) { - xmlhttp = new ActiveXObject("Microsoft.XMLHTTP"); - if (xmlhttp) { - xmlhttp.onreadystatechange = processStateChange; - xmlhttp.open("GET", url, true); - xmlhttp.send(); - } +function getTitle(input) { + var title = $("#titleField").val(); + if (title.length < 1) { + $("#titleField").css("background-image", "url(loading.gif)"); + if (input.indexOf("http") > -1) { + $.get("ajaxGetTitle.php?url=" + input, function(data) { + $("#titleField").css("background-image", "none") + .val(data); + }); } + } } -function processStateChange() { - if (xmlhttp.readyState == 4 && xmlhttp.status == 200) { - response = xmlhttp.responseXML.documentElement; - method = response.getElementsByTagName('method')[0].firstChild.data; - result = response.getElementsByTagName('result')[0].firstChild.data; - eval(method + '(\'\', result)'); - } -} - -function playerLoad() { - var anchors = document.getElementsByTagName('a'); - var anchors_length = anchors.length; - for (var i = 0; i < anchors_length; i++) { - if (anchors[i].className == 'taggedlink' && anchors[i].href.match(/\.mp3$/i)) { - _playerAdd(anchors[i]); - } - } -} \ No newline at end of file +/* Page load */ +$(function() { + /* Insert Flash player for MP3 links */ + if ($("#bookmarks").length > 0) { + $("a[href$=.mp3].taggedlink").each(function() { + var url = this.href; + var code = ''; + code = code + ''; + code = code + ' '; + $(this).prepend(code); + }); + } +}) diff --git a/login.php b/login.php index 41913f0..d361394 100644 --- a/login.php +++ b/login.php @@ -1,6 +1,6 @@ loadTemplate('register.tpl', $tplVars); -?> diff --git a/scuttle.css b/scuttle.css index ecd2ab7..b23fad8 100644 --- a/scuttle.css +++ b/scuttle.css @@ -374,23 +374,26 @@ p { margin: 1em; } p#sort { - color: #CCC; - font-size: small; - float: right; - margin: 0; - position: absolute; - right: 0; - top: 7em; + color: #CCC; + font-size: small; + float: right; + margin: 0; + position: absolute; + right: 0; + top: 7em; } html > body p#sort { - margin-right: 0.75em; + margin-right: 0.75em; } p#sort a { - background: #AAA; - color: #555; - font-weight: normal; - margin-right: 0.5em; - padding: 0 1em; + background: #AAA; + color: #555; + font-weight: normal; + margin-right: 0.5em; + padding: 0 1em; + border-radius: 0.25em; + -moz-border-radius: 0.25em; + -webkit-border-radius: 0.25em; } html > body p#sort a { margin-right: 0; diff --git a/search.php b/search.php index 2c88f91..f0919d8 100644 --- a/search.php +++ b/search.php @@ -1,6 +1,6 @@ db = & $db; - } - - function _getbookmark($fieldname, $value, $all = false) { - if (!$all) { - $userservice = & ServiceFactory :: getServiceInstance('UserService'); - $sId = $userservice->getCurrentUserId(); - $range = ' AND uId = '. $sId; - } - - $query = 'SELECT * FROM '. $GLOBALS['tableprefix'] .'bookmarks WHERE '. $fieldname .' = "'. $this->db->sql_escape($value) .'"'. $range; - - if (!($dbresult = & $this->db->sql_query_limit($query, 1, 0))) { - message_die(GENERAL_ERROR, 'Could not get bookmark', '', __LINE__, __FILE__, $query, $this->db); - return false; - } - - if ($row =& $this->db->sql_fetchrow($dbresult)) { - return $row; - } else { - return false; - } - } - - function & getBookmark($bid, $include_tags = false) { - if (!is_numeric($bid)) - return; - - $sql = 'SELECT * FROM '. $GLOBALS['tableprefix'] .'bookmarks WHERE bId = '. $this->db->sql_escape($bid); - - if (!($dbresult = & $this->db->sql_query($sql))) - message_die(GENERAL_ERROR, 'Could not get vars', '', __LINE__, __FILE__, $sql, $this->db); - - if ($row = & $this->db->sql_fetchrow($dbresult)) { - if ($include_tags) { - $tagservice = & ServiceFactory :: getServiceInstance('TagService'); - $row['tags'] = $tagservice->getTagsForBookmark($bid); - } - return $row; - } else { - return false; - } - } - - function getBookmarkByAddress($address) { - $hash = md5($address); - return $this->getBookmarkByHash($hash); - } - - function getBookmarkByHash($hash) { - return $this->_getbookmark('bHash', $hash, true); - } - - function editAllowed($bookmark) { - if (!is_numeric($bookmark) && (!is_array($bookmark) || !is_numeric($bookmark['bId']))) - return false; - - if (!is_array($bookmark)) - if (!($bookmark = $this->getBookmark($bookmark))) - return false; - - $userservice = & ServiceFactory :: getServiceInstance('UserService'); - $userid = $userservice->getCurrentUserId(); - if ($userservice->isAdmin($userid)) - return true; - else - return ($bookmark['uId'] == $userid); - } - - function bookmarkExists($address = false, $uid = NULL) { - if (!$address) { - return; - } - - // If address doesn't contain ":", add "http://" as the default protocol - if (strpos($address, ':') === false) { - $address = 'http://'. $address; - } - - $crit = array ('bHash' => md5($address)); - if (isset ($uid)) { - $crit['uId'] = $uid; - } - - $sql = 'SELECT COUNT(*) FROM '. $GLOBALS['tableprefix'] .'bookmarks WHERE '. $this->db->sql_build_array('SELECT', $crit); - if (!($dbresult = & $this->db->sql_query($sql))) { - message_die(GENERAL_ERROR, 'Could not get vars', '', __LINE__, __FILE__, $sql, $this->db); - } - return ($this->db->sql_fetchfield(0, 0) > 0); - } - - // Adds a bookmark to the database. - // Note that date is expected to be a string that's interpretable by strtotime(). - function addBookmark($address, $title, $description, $status, $categories, $date = NULL, $fromApi = false, $fromImport = false) { - $userservice = & ServiceFactory :: getServiceInstance('UserService'); - $sId = $userservice->getCurrentUserId(); - - // If bookmark address doesn't contain ":", add "http://" to the start as a default protocol - if (strpos($address, ':') === false) { - $address = 'http://'. $address; - } - - // Get the client's IP address and the date; note that the date is in GMT. - if (getenv('HTTP_CLIENT_IP')) - $ip = getenv('HTTP_CLIENT_IP'); - else - if (getenv('REMOTE_ADDR')) - $ip = getenv('REMOTE_ADDR'); - else - $ip = getenv('HTTP_X_FORWARDED_FOR'); - - // Note that if date is NULL, then it's added with a date and time of now, and if it's present, - // it's expected to be a string that's interpretable by strtotime(). - if (is_null($date)) - $time = time(); - else - $time = strtotime($date); - $datetime = gmdate('Y-m-d H:i:s', $time); - - // Set up the SQL insert statement and execute it. - $values = array('uId' => intval($sId), 'bIp' => $ip, 'bDatetime' => $datetime, 'bModified' => $datetime, 'bTitle' => $title, 'bAddress' => $address, 'bDescription' => $description, 'bStatus' => intval($status), 'bHash' => md5($address)); - $sql = 'INSERT INTO '. $GLOBALS['tableprefix'] .'bookmarks '. $this->db->sql_build_array('INSERT', $values); - $this->db->sql_transaction('begin'); - if (!($dbresult = & $this->db->sql_query($sql))) { - $this->db->sql_transaction('rollback'); - message_die(GENERAL_ERROR, 'Could not insert bookmark', '', __LINE__, __FILE__, $sql, $this->db); - return false; - } - // Get the resultant row ID for the bookmark. - $bId = $this->db->sql_nextid($dbresult); - if (!isset($bId) || !is_int($bId)) { - $this->db->sql_transaction('rollback'); - message_die(GENERAL_ERROR, 'Could not insert bookmark', '', __LINE__, __FILE__, $sql, $this->db); - return false; - } - - $uriparts = explode('.', $address); - $extension = end($uriparts); - unset($uriparts); - - $tagservice = & ServiceFactory :: getServiceInstance('TagService'); - if (!$tagservice->attachTags($bId, $categories, $fromApi, $extension, false, $fromImport)) { - $this->db->sql_transaction('rollback'); - message_die(GENERAL_ERROR, 'Could not insert bookmark', '', __LINE__, __FILE__, $sql, $this->db); - return false; - } - $this->db->sql_transaction('commit'); - // Everything worked out, so return the new bookmark's bId. - return $bId; - } - - function updateBookmark($bId, $address, $title, $description, $status, $categories, $date = NULL, $fromApi = false) { - if (!is_numeric($bId)) - return false; - - // Get the client's IP address and the date; note that the date is in GMT. - if (getenv('HTTP_CLIENT_IP')) - $ip = getenv('HTTP_CLIENT_IP'); - else - if (getenv('REMOTE_ADDR')) - $ip = getenv('REMOTE_ADDR'); - else - $ip = getenv('HTTP_X_FORWARDED_FOR'); - - $moddatetime = gmdate('Y-m-d H:i:s', time()); - - // Set up the SQL update statement and execute it. - $updates = array('bModified' => $moddatetime, 'bTitle' => $title, 'bAddress' => $address, 'bDescription' => $description, 'bStatus' => $status, 'bHash' => md5($address)); - - if (!is_null($date)) { - $updates['bDateTime'] = gmdate('Y-m-d H:i:s', strtotime($date)); - } - - $sql = 'UPDATE '. $GLOBALS['tableprefix'] .'bookmarks SET '. $this->db->sql_build_array('UPDATE', $updates) .' WHERE bId = '. intval($bId); - $this->db->sql_transaction('begin'); - - if (!($dbresult = & $this->db->sql_query($sql))) { - $this->db->sql_transaction('rollback'); - message_die(GENERAL_ERROR, 'Could not update bookmark', '', __LINE__, __FILE__, $sql, $this->db); - return false; - } - - $uriparts = explode('.', $address); - $extension = end($uriparts); - unset($uriparts); - - $tagservice = & ServiceFactory :: getServiceInstance('TagService'); - if (!$tagservice->attachTags($bId, $categories, $fromApi, $extension)) { - $this->db->sql_transaction('rollback'); - message_die(GENERAL_ERROR, 'Could not update bookmark', '', __LINE__, __FILE__, $sql, $this->db); - return false; - } - - $this->db->sql_transaction('commit'); - // Everything worked out, so return true. - return true; - } - - function & getBookmarks($start = 0, $perpage = NULL, $user = NULL, $tags = NULL, $terms = NULL, $sortOrder = NULL, $watched = NULL, $startdate = NULL, $enddate = NULL, $hash = NULL) { - // Only get the bookmarks that are visible to the current user. Our rules: - // - if the $user is NULL, that means get bookmarks from ALL users, so we need to make - // sure to check the logged-in user's watchlist and get the contacts-only bookmarks from - // those users. If the user isn't logged-in, just get the public bookmarks. - // - if the $user is set and isn't the logged-in user, then get that user's bookmarks, and - // if that user is on the logged-in user's watchlist, get the public AND contacts-only - // bookmarks; otherwise, just get the public bookmarks. - // - if the $user is set and IS the logged-in user, then get all bookmarks. - $userservice =& ServiceFactory::getServiceInstance('UserService'); - $tagservice =& ServiceFactory::getServiceInstance('TagService'); - $sId = $userservice->getCurrentUserId(); - - if ($userservice->isLoggedOn()) { - // All public bookmarks, user's own bookmarks and any shared with user - $privacy = ' AND ((B.bStatus = 0) OR (B.uId = '. $sId .')'; - $watchnames = $userservice->getWatchNames($sId, true); - foreach($watchnames as $watchuser) { - $privacy .= ' OR (U.username = "'. $watchuser .'" AND B.bStatus = 1)'; - } - $privacy .= ')'; - } else { - // Just public bookmarks - $privacy = ' AND B.bStatus = 0'; - } - - // Set up the tags, if need be. - if (!is_array($tags) && !is_null($tags)) { - $tags = explode('+', trim($tags)); - } - - $tagcount = count($tags); - for ($i = 0; $i < $tagcount; $i ++) { - $tags[$i] = trim($tags[$i]); - } - - // Set up the SQL query. - $query_1 = 'SELECT DISTINCT '; - if (SQL_LAYER == 'mysql4') { - $query_1 .= 'SQL_CALC_FOUND_ROWS '; - } - $query_1 .= 'B.*, U.'. $userservice->getFieldName('username'); - - $query_2 = ' FROM '. $userservice->getTableName() .' AS U, '. $GLOBALS['tableprefix'] .'bookmarks AS B'; - - $query_3 = ' WHERE B.uId = U.'. $userservice->getFieldName('primary') . $privacy; - if (is_null($watched)) { - if (!is_null($user)) { - $query_3 .= ' AND B.uId = '. $user; - } - } else { - $arrWatch = $userservice->getWatchlist($user); - if (count($arrWatch) > 0) { - foreach($arrWatch as $row) { - $query_3_1 .= 'B.uId = '. intval($row) .' OR '; - } - $query_3_1 = substr($query_3_1, 0, -3); - } else { - $query_3_1 = 'B.uId = -1'; - } - $query_3 .= ' AND ('. $query_3_1 .') AND B.bStatus IN (0, 1)'; - } - - switch($sortOrder) { - case 'date_asc': - $query_5 = ' ORDER BY B.bDatetime ASC '; - break; - case 'title_desc': - $query_5 = ' ORDER BY B.bTitle DESC '; - break; - case 'title_asc': - $query_5 = ' ORDER BY B.bTitle ASC '; - break; - case 'url_desc': - $query_5 = ' ORDER BY B.bAddress DESC '; - break; - case 'url_asc': - $query_5 = ' ORDER BY B.bAddress ASC '; - break; - default: - $query_5 = ' ORDER BY B.bDatetime DESC '; - } - - // Handle the parts of the query that depend on any tags that are present. - $query_4 = ''; - for ($i = 0; $i < $tagcount; $i ++) { - $query_2 .= ', '. $GLOBALS['tableprefix'] .'tags AS T'. $i; - $query_4 .= ' AND T'. $i .'.tag = "'. $this->db->sql_escape($tags[$i]) .'" AND T'. $i .'.bId = B.bId'; - } - - // Search terms - if ($terms) { - // Multiple search terms okay - $aTerms = explode(' ', $terms); - $aTerms = array_map('trim', $aTerms); - - // Search terms in tags as well when none given - if (!count($tags)) { - $query_2 .= ' LEFT JOIN '. $GLOBALS['tableprefix'] .'tags AS T ON B.bId = T.bId'; - $dotags = true; - } else { - $dotags = false; - } - - $query_4 = ''; - for ($i = 0; $i < count($aTerms); $i++) { - $query_4 .= ' AND (B.bTitle LIKE "%'. $this->db->sql_escape($aTerms[$i]) .'%"'; - $query_4 .= ' OR B.bDescription LIKE "%'. $this->db->sql_escape($aTerms[$i]) .'%"'; - if ($dotags) { - $query_4 .= ' OR T.tag = "'. $this->db->sql_escape($aTerms[$i]) .'"'; - } - $query_4 .= ')'; - } - } - - // Start and end dates - if ($startdate) { - $query_4 .= ' AND B.bDatetime > "'. $startdate .'"'; - } - if ($enddate) { - $query_4 .= ' AND B.bDatetime < "'. $enddate .'"'; - } - - // Hash - if ($hash) { - $query_4 .= ' AND B.bHash = "'. $hash .'"'; - } - - $query = $query_1 . $query_2 . $query_3 . $query_4 . $query_5; - if (!($dbresult = & $this->db->sql_query_limit($query, intval($perpage), intval($start)))) { - message_die(GENERAL_ERROR, 'Could not get bookmarks', '', __LINE__, __FILE__, $query, $this->db); - return false; - } - - if (SQL_LAYER == 'mysql4') { - $totalquery = 'SELECT FOUND_ROWS() AS total'; - } else { - $totalquery = 'SELECT COUNT(*) AS total'. $query_2 . $query_3 . $query_4; - } - - if (!($totalresult = & $this->db->sql_query($totalquery)) || (!($row = & $this->db->sql_fetchrow($totalresult)))) { - message_die(GENERAL_ERROR, 'Could not get total bookmarks', '', __LINE__, __FILE__, $totalquery, $this->db); - return false; - } - - $total = $row['total']; - - $bookmarks = array(); - while ($row = & $this->db->sql_fetchrow($dbresult)) { - $row['tags'] = $tagservice->getTagsForBookmark(intval($row['bId'])); - $bookmarks[] = $row; - } - return array ('bookmarks' => $bookmarks, 'total' => $total); - } - - function deleteBookmark($bookmarkid) { - $query = 'DELETE FROM '. $GLOBALS['tableprefix'] .'bookmarks WHERE bId = '. intval($bookmarkid); - $this->db->sql_transaction('begin'); - if (!($dbresult = & $this->db->sql_query($query))) { - $this->db->sql_transaction('rollback'); - message_die(GENERAL_ERROR, 'Could not delete bookmarks', '', __LINE__, __FILE__, $query, $this->db); - return false; - } - - $query = 'DELETE FROM '. $GLOBALS['tableprefix'] .'tags WHERE bId = '. intval($bookmarkid); - $this->db->sql_transaction('begin'); - if (!($dbresult = & $this->db->sql_query($query))) { - $this->db->sql_transaction('rollback'); - message_die(GENERAL_ERROR, 'Could not delete bookmarks', '', __LINE__, __FILE__, $query, $this->db); - return false; - } - - $this->db->sql_transaction('commit'); - return true; - } - - function countOthers($address) { - if (!$address) { - return false; - } - - $userservice = & ServiceFactory :: getServiceInstance('UserService'); - $sId = $userservice->getCurrentUserId(); - - if ($userservice->isLoggedOn()) { - // All public bookmarks, user's own bookmarks and any shared with user - $privacy = ' AND ((B.bStatus = 0) OR (B.uId = '. $sId .')'; - $watchnames = $userservice->getWatchNames($sId, true); - foreach($watchnames as $watchuser) { - $privacy .= ' OR (U.username = "'. $watchuser .'" AND B.bStatus = 1)'; - } - $privacy .= ')'; - } else { - // Just public bookmarks - $privacy = ' AND B.bStatus = 0'; - } - - $sql = 'SELECT COUNT(*) FROM '. $userservice->getTableName() .' AS U, '. $GLOBALS['tableprefix'] .'bookmarks AS B WHERE U.'. $userservice->getFieldName('primary') .' = B.uId AND B.bHash = "'. md5($address) .'"'. $privacy; - if (!($dbresult = & $this->db->sql_query($sql))) { - message_die(GENERAL_ERROR, 'Could not get vars', '', __LINE__, __FILE__, $sql, $this->db); - } - return $this->db->sql_fetchfield(0, 0) - 1; + function &getInstance(&$db) { + static $instance; + if (!isset($instance)) { + $instance = new BookmarkService($db); } + return $instance; + } + + function BookmarkService(&$db) { + $this->db =& $db; + } + + function _getbookmark($fieldname, $value, $all = false) { + if (!$all) { + $userservice = & ServiceFactory :: getServiceInstance('UserService'); + $sId = $userservice->getCurrentUserId(); + $range = ' AND uId = '. $sId; + } + + $query = 'SELECT * FROM '. $GLOBALS['tableprefix'] .'bookmarks WHERE '. $fieldname .' = "'. $this->db->sql_escape($value) .'"'. $range; + + if (!($dbresult = & $this->db->sql_query_limit($query, 1, 0))) { + message_die(GENERAL_ERROR, 'Could not get bookmark', '', __LINE__, __FILE__, $query, $this->db); + return false; + } + + if ($row =& $this->db->sql_fetchrow($dbresult)) { + return $row; + } else { + return false; + } + } + + function & getBookmark($bid, $include_tags = false) { + if (!is_numeric($bid)) + return; + + $sql = 'SELECT * FROM '. $GLOBALS['tableprefix'] .'bookmarks WHERE bId = '. $this->db->sql_escape($bid); + + if (!($dbresult = & $this->db->sql_query($sql))) + message_die(GENERAL_ERROR, 'Could not get vars', '', __LINE__, __FILE__, $sql, $this->db); + + if ($row = & $this->db->sql_fetchrow($dbresult)) { + if ($include_tags) { + $tagservice = & ServiceFactory :: getServiceInstance('TagService'); + $row['tags'] = $tagservice->getTagsForBookmark($bid); + } + return $row; + } else { + return false; + } + } + + function getBookmarkByAddress($address) { + $hash = md5($address); + return $this->getBookmarkByHash($hash); + } + + function getBookmarkByHash($hash) { + return $this->_getbookmark('bHash', $hash, true); + } + + function editAllowed($bookmark) { + if (!is_numeric($bookmark) && (!is_array($bookmark) || !is_numeric($bookmark['bId']))) + return false; + + if (!is_array($bookmark)) + if (!($bookmark = $this->getBookmark($bookmark))) + return false; + + $userservice = & ServiceFactory :: getServiceInstance('UserService'); + $userid = $userservice->getCurrentUserId(); + if ($userservice->isAdmin($userid)) + return true; + else + return ($bookmark['uId'] == $userid); + } + + function bookmarkExists($address = false, $uid = NULL) { + if (!$address) { + return; + } + + // If address doesn't contain ":", add "http://" as the default protocol + if (strpos($address, ':') === false) { + $address = 'http://'. $address; + } + + $crit = array ('bHash' => md5($address)); + if (isset ($uid)) { + $crit['uId'] = $uid; + } + + $sql = 'SELECT COUNT(*) FROM '. $GLOBALS['tableprefix'] .'bookmarks WHERE '. $this->db->sql_build_array('SELECT', $crit); + if (!($dbresult = & $this->db->sql_query($sql))) { + message_die(GENERAL_ERROR, 'Could not get vars', '', __LINE__, __FILE__, $sql, $this->db); + } + return ($this->db->sql_fetchfield(0, 0) > 0); + } + + // Adds a bookmark to the database. + // Note that date is expected to be a string that's interpretable by strtotime(). + function addBookmark($address, $title, $description, $status, $categories, $date = NULL, $fromApi = false, $fromImport = false) { + $userservice = & ServiceFactory :: getServiceInstance('UserService'); + $sId = $userservice->getCurrentUserId(); + + // If bookmark address doesn't contain ":", add "http://" to the start as a default protocol + if (strpos($address, ':') === false) { + $address = 'http://'. $address; + } + + // Get the client's IP address and the date; note that the date is in GMT. + if (getenv('HTTP_CLIENT_IP')) + $ip = getenv('HTTP_CLIENT_IP'); + else + if (getenv('REMOTE_ADDR')) + $ip = getenv('REMOTE_ADDR'); + else + $ip = getenv('HTTP_X_FORWARDED_FOR'); + + // Note that if date is NULL, then it's added with a date and time of now, and if it's present, + // it's expected to be a string that's interpretable by strtotime(). + if (is_null($date)) + $time = time(); + else + $time = strtotime($date); + $datetime = gmdate('Y-m-d H:i:s', $time); + + // Set up the SQL insert statement and execute it. + $values = array('uId' => intval($sId), 'bIp' => $ip, 'bDatetime' => $datetime, 'bModified' => $datetime, 'bTitle' => $title, 'bAddress' => $address, 'bDescription' => $description, 'bStatus' => intval($status), 'bHash' => md5($address)); + $sql = 'INSERT INTO '. $GLOBALS['tableprefix'] .'bookmarks '. $this->db->sql_build_array('INSERT', $values); + $this->db->sql_transaction('begin'); + if (!($dbresult = & $this->db->sql_query($sql))) { + $this->db->sql_transaction('rollback'); + message_die(GENERAL_ERROR, 'Could not insert bookmark', '', __LINE__, __FILE__, $sql, $this->db); + return false; + } + // Get the resultant row ID for the bookmark. + $bId = $this->db->sql_nextid($dbresult); + if (!isset($bId) || !is_int($bId)) { + $this->db->sql_transaction('rollback'); + message_die(GENERAL_ERROR, 'Could not insert bookmark', '', __LINE__, __FILE__, $sql, $this->db); + return false; + } + + $uriparts = explode('.', $address); + $extension = end($uriparts); + unset($uriparts); + + $tagservice = & ServiceFactory :: getServiceInstance('TagService'); + if (!$tagservice->attachTags($bId, $categories, $fromApi, $extension, false, $fromImport)) { + $this->db->sql_transaction('rollback'); + message_die(GENERAL_ERROR, 'Could not insert bookmark', '', __LINE__, __FILE__, $sql, $this->db); + return false; + } + $this->db->sql_transaction('commit'); + // Everything worked out, so return the new bookmark's bId. + return $bId; + } + + function updateBookmark($bId, $address, $title, $description, $status, $categories, $date = NULL, $fromApi = false) { + if (!is_numeric($bId)) + return false; + + // Get the client's IP address and the date; note that the date is in GMT. + if (getenv('HTTP_CLIENT_IP')) + $ip = getenv('HTTP_CLIENT_IP'); + else + if (getenv('REMOTE_ADDR')) + $ip = getenv('REMOTE_ADDR'); + else + $ip = getenv('HTTP_X_FORWARDED_FOR'); + + $moddatetime = gmdate('Y-m-d H:i:s', time()); + + // Set up the SQL update statement and execute it. + $updates = array('bModified' => $moddatetime, 'bTitle' => $title, 'bAddress' => $address, 'bDescription' => $description, 'bStatus' => $status, 'bHash' => md5($address)); + + if (!is_null($date)) { + $updates['bDateTime'] = gmdate('Y-m-d H:i:s', strtotime($date)); + } + + $sql = 'UPDATE '. $GLOBALS['tableprefix'] .'bookmarks SET '. $this->db->sql_build_array('UPDATE', $updates) .' WHERE bId = '. intval($bId); + $this->db->sql_transaction('begin'); + + if (!($dbresult = & $this->db->sql_query($sql))) { + $this->db->sql_transaction('rollback'); + message_die(GENERAL_ERROR, 'Could not update bookmark', '', __LINE__, __FILE__, $sql, $this->db); + return false; + } + + $uriparts = explode('.', $address); + $extension = end($uriparts); + unset($uriparts); + + $tagservice = & ServiceFactory :: getServiceInstance('TagService'); + if (!$tagservice->attachTags($bId, $categories, $fromApi, $extension)) { + $this->db->sql_transaction('rollback'); + message_die(GENERAL_ERROR, 'Could not update bookmark', '', __LINE__, __FILE__, $sql, $this->db); + return false; + } + + $this->db->sql_transaction('commit'); + // Everything worked out, so return true. + return true; + } + + function & getBookmarks($start = 0, $perpage = NULL, $user = NULL, $tags = NULL, $terms = NULL, $sortOrder = NULL, $watched = NULL, $startdate = NULL, $enddate = NULL, $hash = NULL) { + // Only get the bookmarks that are visible to the current user. Our rules: + // - if the $user is NULL, that means get bookmarks from ALL users, so we need to make + // sure to check the logged-in user's watchlist and get the contacts-only bookmarks from + // those users. If the user isn't logged-in, just get the public bookmarks. + // - if the $user is set and isn't the logged-in user, then get that user's bookmarks, and + // if that user is on the logged-in user's watchlist, get the public AND contacts-only + // bookmarks; otherwise, just get the public bookmarks. + // - if the $user is set and IS the logged-in user, then get all bookmarks. + $userservice =& ServiceFactory::getServiceInstance('UserService'); + $tagservice =& ServiceFactory::getServiceInstance('TagService'); + $sId = $userservice->getCurrentUserId(); + + if ($userservice->isLoggedOn()) { + // All public bookmarks, user's own bookmarks and any shared with user + $privacy = ' AND ((B.bStatus = 0) OR (B.uId = '. $sId .')'; + $watchnames = $userservice->getWatchNames($sId, true); + foreach($watchnames as $watchuser) { + $privacy .= ' OR (U.username = "'. $watchuser .'" AND B.bStatus = 1)'; + } + $privacy .= ')'; + } else { + // Just public bookmarks + $privacy = ' AND B.bStatus = 0'; + } + + // Set up the tags, if need be. + if (!is_array($tags) && !is_null($tags)) { + $tags = explode('+', trim($tags)); + } + + $tagcount = count($tags); + for ($i = 0; $i < $tagcount; $i ++) { + $tags[$i] = trim($tags[$i]); + } + + // Set up the SQL query. + $query_1 = 'SELECT DISTINCT '; + if (SQL_LAYER == 'mysql4') { + $query_1 .= 'SQL_CALC_FOUND_ROWS '; + } + $query_1 .= 'B.*, U.'. $userservice->getFieldName('username'); + + $query_2 = ' FROM '. $userservice->getTableName() .' AS U, '. $GLOBALS['tableprefix'] .'bookmarks AS B'; + + $query_3 = ' WHERE B.uId = U.'. $userservice->getFieldName('primary') . $privacy; + if (is_null($watched)) { + if (!is_null($user)) { + $query_3 .= ' AND B.uId = '. $user; + } + } else { + $arrWatch = $userservice->getWatchlist($user); + if (count($arrWatch) > 0) { + foreach($arrWatch as $row) { + $query_3_1 .= 'B.uId = '. intval($row) .' OR '; + } + $query_3_1 = substr($query_3_1, 0, -3); + } else { + $query_3_1 = 'B.uId = -1'; + } + $query_3 .= ' AND ('. $query_3_1 .') AND B.bStatus IN (0, 1)'; + } + + switch($sortOrder) { + case 'date_asc': + $query_5 = ' ORDER BY B.bDatetime ASC '; + break; + case 'title_desc': + $query_5 = ' ORDER BY B.bTitle DESC '; + break; + case 'title_asc': + $query_5 = ' ORDER BY B.bTitle ASC '; + break; + case 'url_desc': + $query_5 = ' ORDER BY B.bAddress DESC '; + break; + case 'url_asc': + $query_5 = ' ORDER BY B.bAddress ASC '; + break; + default: + $query_5 = ' ORDER BY B.bDatetime DESC '; + } + + // Handle the parts of the query that depend on any tags that are present. + $query_4 = ''; + for ($i = 0; $i < $tagcount; $i ++) { + $query_2 .= ', '. $GLOBALS['tableprefix'] .'tags AS T'. $i; + $query_4 .= ' AND T'. $i .'.tag = "'. $this->db->sql_escape($tags[$i]) .'" AND T'. $i .'.bId = B.bId'; + } + + // Search terms + if ($terms) { + // Multiple search terms okay + $aTerms = explode(' ', $terms); + $aTerms = array_map('trim', $aTerms); + + // Search terms in tags as well when none given + if (!count($tags)) { + $query_2 .= ' LEFT JOIN '. $GLOBALS['tableprefix'] .'tags AS T ON B.bId = T.bId'; + $dotags = true; + } else { + $dotags = false; + } + + $query_4 = ''; + for ($i = 0; $i < count($aTerms); $i++) { + $query_4 .= ' AND (B.bTitle LIKE "%'. $this->db->sql_escape($aTerms[$i]) .'%"'; + $query_4 .= ' OR B.bDescription LIKE "%'. $this->db->sql_escape($aTerms[$i]) .'%"'; + if ($dotags) { + $query_4 .= ' OR T.tag = "'. $this->db->sql_escape($aTerms[$i]) .'"'; + } + $query_4 .= ')'; + } + } + + // Start and end dates + if ($startdate) { + $query_4 .= ' AND B.bDatetime > "'. $startdate .'"'; + } + if ($enddate) { + $query_4 .= ' AND B.bDatetime < "'. $enddate .'"'; + } + + // Hash + if ($hash) { + $query_4 .= ' AND B.bHash = "'. $hash .'"'; + } + + $query = $query_1 . $query_2 . $query_3 . $query_4 . $query_5; + if (!($dbresult = & $this->db->sql_query_limit($query, intval($perpage), intval($start)))) { + message_die(GENERAL_ERROR, 'Could not get bookmarks', '', __LINE__, __FILE__, $query, $this->db); + return false; + } + + if (SQL_LAYER == 'mysql4') { + $totalquery = 'SELECT FOUND_ROWS() AS total'; + } else { + $totalquery = 'SELECT COUNT(*) AS total'. $query_2 . $query_3 . $query_4; + } + + if (!($totalresult = & $this->db->sql_query($totalquery)) || (!($row = & $this->db->sql_fetchrow($totalresult)))) { + message_die(GENERAL_ERROR, 'Could not get total bookmarks', '', __LINE__, __FILE__, $totalquery, $this->db); + return false; + } + + $total = $row['total']; + + $bookmarks = array(); + while ($row = & $this->db->sql_fetchrow($dbresult)) { + $row['tags'] = $tagservice->getTagsForBookmark(intval($row['bId'])); + $bookmarks[] = $row; + } + return array ('bookmarks' => $bookmarks, 'total' => $total); + } + + function deleteBookmark($bookmarkid) { + $query = 'DELETE FROM '. $GLOBALS['tableprefix'] .'bookmarks WHERE bId = '. intval($bookmarkid); + $this->db->sql_transaction('begin'); + if (!($dbresult = & $this->db->sql_query($query))) { + $this->db->sql_transaction('rollback'); + message_die(GENERAL_ERROR, 'Could not delete bookmarks', '', __LINE__, __FILE__, $query, $this->db); + return false; + } + + $query = 'DELETE FROM '. $GLOBALS['tableprefix'] .'tags WHERE bId = '. intval($bookmarkid); + $this->db->sql_transaction('begin'); + if (!($dbresult = & $this->db->sql_query($query))) { + $this->db->sql_transaction('rollback'); + message_die(GENERAL_ERROR, 'Could not delete bookmarks', '', __LINE__, __FILE__, $query, $this->db); + return false; + } + + $this->db->sql_transaction('commit'); + return true; + } + + function countOthers($address) { + if (!$address) { + return false; + } + + $userservice = & ServiceFactory :: getServiceInstance('UserService'); + $sId = $userservice->getCurrentUserId(); + + if ($userservice->isLoggedOn()) { + // All public bookmarks, user's own bookmarks and any shared with user + $privacy = ' AND ((B.bStatus = 0) OR (B.uId = '. $sId .')'; + $watchnames = $userservice->getWatchNames($sId, true); + foreach($watchnames as $watchuser) { + $privacy .= ' OR (U.username = "'. $watchuser .'" AND B.bStatus = 1)'; + } + $privacy .= ')'; + } else { + // Just public bookmarks + $privacy = ' AND B.bStatus = 0'; + } + + $sql = 'SELECT COUNT(*) FROM '. $userservice->getTableName() .' AS U, '. $GLOBALS['tableprefix'] .'bookmarks AS B WHERE U.'. $userservice->getFieldName('primary') .' = B.uId AND B.bHash = "'. md5($address) .'"'. $privacy; + if (!($dbresult = & $this->db->sql_query($sql))) { + message_die(GENERAL_ERROR, 'Could not get vars', '', __LINE__, __FILE__, $sql, $this->db); + } + return $this->db->sql_fetchfield(0, 0) - 1; + } } -?> diff --git a/services/cacheservice.php b/services/cacheservice.php index fe66d38..2169e2d 100644 --- a/services/cacheservice.php +++ b/services/cacheservice.php @@ -1,38 +1,36 @@ basedir = $GLOBALS['dir_cache']; - } + function CacheService() { + $this->basedir = $GLOBALS['dir_cache']; + } - function Start($hash, $time = 300) { - $cachefile = $this->basedir .'/'. $hash . $this->fileextension; - if (file_exists($cachefile) && time() < filemtime($cachefile) + $time) { - @readfile($cachefile); - echo "\n\n"; - unset($cachefile); - exit; - } - ob_start("ob_gzhandler"); + function Start($hash, $time = 300) { + $cachefile = $this->basedir .'/'. $hash . $this->fileextension; + if (file_exists($cachefile) && time() < filemtime($cachefile) + $time) { + @readfile($cachefile); + echo "\n\n"; + unset($cachefile); + exit; } + ob_start("ob_gzhandler"); + } - function End($hash) { - $cachefile = $this->basedir .'/'. $hash . $this->fileextension; - $handle = fopen($cachefile, 'w'); - fwrite($handle, ob_get_contents()); - fclose($handle); - ob_flush(); - } + function End($hash) { + $cachefile = $this->basedir .'/'. $hash . $this->fileextension; + $handle = fopen($cachefile, 'w'); + fwrite($handle, ob_get_contents()); + fclose($handle); + ob_flush(); + } } -?> \ No newline at end of file diff --git a/services/servicefactory.php b/services/servicefactory.php index ba2d6d7..5f7635a 100644 --- a/services/servicefactory.php +++ b/services/servicefactory.php @@ -1,14 +1,14 @@ sql_connect($dbhost, $dbuser, $dbpass, $dbname, $dbport, $dbpersist); if(!$db->db_connect_id) { @@ -23,11 +23,10 @@ class ServiceFactory { if (!isset($servicedir)) { $servicedir = dirname(__FILE__) .'/'; } - require_once($servicedir . strtolower($name) . '.php'); + require_once $servicedir . strtolower($name) .'.php'; } $instances[$name] = call_user_func(array($name, 'getInstance'), $db); - } + } return $instances[$name]; } } -?> \ No newline at end of file diff --git a/services/tagservice.php b/services/tagservice.php index 6bfbf15..9c6c75f 100644 --- a/services/tagservice.php +++ b/services/tagservice.php @@ -1,26 +1,24 @@ db =& $db; - $this->tablename = $GLOBALS['tableprefix'] .'tags'; + function &getInstance(&$db) { + static $instance; + if (!isset($instance)) { + $instance = new TagService($db); } + return $instance; + } - function isNotSystemTag($var) { - if (utf8_substr($var, 0, 7) == 'system:') - return false; - else - return true; - } + function TagService(&$db) { + $this->db =& $db; + $this->tablename = $GLOBALS['tableprefix'] .'tags'; + } + + function isNotSystemTag($var) { + return !(utf8_substr($var, 0, 7) == 'system:'); + } function attachTags($bookmarkid, $tags, $fromApi = false, $extension = NULL, $replace = true, $fromImport = false) { // Make sure that categories is an array of trimmed strings, and that if the categories are @@ -45,7 +43,7 @@ class TagService { for ($i = 0; $i < $tags_count; $i++) { $tags[$i] = trim(strtolower($tags[$i])); if ($fromApi) { - include_once(dirname(__FILE__) .'/../functions.inc.php'); + include_once dirname(__FILE__) .'/../functions.inc.php'; $tags[$i] = convertTag($tags[$i], 'in'); } } @@ -64,7 +62,7 @@ class TagService { // Media and file types if (!is_null($extension)) { - include_once(dirname(__FILE__) .'/../functions.inc.php'); + include_once dirname(__FILE__) .'/../functions.inc.php'; if ($keys = multi_array_search($extension, $GLOBALS['filetypes'])) { $tags[] = 'system:filetype:'. $extension; $tags[] = 'system:media:'. array_shift($keys); @@ -350,7 +348,7 @@ class TagService { } if ($sortOrder == 'alphabet_asc') { - usort($output, create_function('$a,$b','return strcasecmp(utf8_deaccent($a["tag"]), utf8_deaccent($b["tag"]));')); + usort($output, create_function('$a,$b','return strcmp(utf8_strtolower($a["tag"]), utf8_strtolower($b["tag"]));')); } return $output; @@ -360,4 +358,3 @@ class TagService { function getTableName() { return $this->tablename; } function setTableName($value) { $this->tablename = $value; } } -?> \ No newline at end of file diff --git a/services/templateservice.php b/services/templateservice.php index 191ab8d..1a1719c 100644 --- a/services/templateservice.php +++ b/services/templateservice.php @@ -1,46 +1,48 @@ basedir = $GLOBALS['TEMPLATES_DIR']; + var $basedir; + + function &getInstance() { + static $instance; + if (!isset($instance)) { + $instance = new TemplateService(); } - - function loadTemplate($template, $vars = NULL) { - if (substr($template, -4) != '.php') - $template .= '.php'; - $tpl =& new Template($this->basedir .'/'. $template, $vars, $this); - $tpl->parse(); - return $tpl; + return $instance; + } + + function TemplateService() { + $this->basedir = $GLOBALS['TEMPLATES_DIR']; + } + + function loadTemplate($template, $vars = NULL) { + if (substr($template, -4) != '.php') { + $template .= '.php'; } + $tpl = new Template($this->basedir .'/'. $template, $vars, $this); + $tpl->parse(); + return $tpl; + } } class Template { - var $vars = array(); - var $file = ''; - var $templateservice; - - function Template($file, $vars = NULL, &$templateservice) { - $this->vars = $vars; - $this->file = $file; - $this->templateservice = $templateservice; - } - - function parse() { - if (isset($this->vars)) - extract($this->vars); - include($this->file); - } - - function includeTemplate($name) { - return $this->templateservice->loadTemplate($name, $this->vars); + var $vars = array(); + var $file = ''; + var $templateservice; + + function Template($file, $vars = NULL, &$templateservice) { + $this->vars = $vars; + $this->file = $file; + $this->templateservice = $templateservice; + } + + function parse() { + if (isset($this->vars)) { + extract($this->vars); } + include $this->file; + } + + function includeTemplate($name) { + return $this->templateservice->loadTemplate($name, $this->vars); + } } -?> \ No newline at end of file diff --git a/services/userservice.php b/services/userservice.php index e0b7ba9..a2709fb 100644 --- a/services/userservice.php +++ b/services/userservice.php @@ -1,24 +1,25 @@ 'uId', - 'username' => 'username', - 'password' => 'password' - ); - var $profileurl; - var $tablename; - var $sessionkey; - var $cookiekey; - var $cookietime = 1209600; // 2 weeks + return $instance; + } + + var $fields = array( + 'primary' => 'uId', + 'username' => 'username', + 'password' => 'password' + ); + var $profileurl; + var $tablename; + var $sessionkey; + var $cookiekey; + var $cookietime = 1209600; // 2 weeks function UserService(&$db) { $this->db =& $db; @@ -334,7 +335,7 @@ class UserService { function isValidEmail($email) { if (preg_match("/^((?:(?:(?:\w[\.\-\+_]?)*)\w)+)\@((?:(?:(?:\w[\.\-_]?){0,62})\w)+)\.(\w{2,6})$/i", $email) > 0) { - list($emailUser, $emailDomain) = split("@", $email); + list($emailUser, $emailDomain) = explode("@", $email); // Check if the email domain has a DNS record if ($this->_checkdns($emailDomain)) { @@ -357,4 +358,3 @@ class UserService { function getCookieKey() { return $this->cookiekey; } function setCookieKey($value) { $this->cookiekey = $value; } } -?> diff --git a/tagdelete.php b/tagdelete.php index 9f11bdd..2d39219 100644 --- a/tagdelete.php +++ b/tagdelete.php @@ -1,6 +1,6 @@ includeTemplate($GLOBALS['top_include']); -?> +includeTemplate($GLOBALS['top_include']); ?>
    -
  • Store all your favourite links in one place, accessible from anywhere.'); ?>
  • -
  • Share your bookmarks with everyone, with friends on your watchlist or just keep them private.') ;?>
  • -
  • Tag your bookmarks with as many labels as you want, instead of wrestling with folders.'); ?>
  • -
  • Register now to start using %s!'), $GLOBALS['sitename']); ?>
  • +
  • Store all your favourite links in one place, accessible from anywhere.'); ?>
  • +
  • Share your bookmarks with everyone, with friends on your watchlist or just keep them private.') ;?>
  • +
  • Tag your bookmarks with as many labels as you want, instead of wrestling with folders.'); ?>
  • +
  • Register now to start using %s!'), $GLOBALS['sitename']); ?>

    -
  • an open-source project licensed under the GNU General Public License. This means you can host it on your own web server for free, whether it is on the Internet, a private network or just your own computer.'), $GLOBALS['sitename']); ?>
  • -
  • del.icio.us API. Almost all of the neat tools made for that system can be modified to work with %1$s instead. If you find a tool that won\'t let you change the API address, ask the creator to add this setting. You never know, they might just do it.'), $GLOBALS['sitename']); ?>
  • +
  • an open-source project licensed under the GNU General Public License. This means you can host it on your own web server for free, whether it is on the Internet, a private network or just your own computer.'), $GLOBALS['sitename']); ?>
  • +
  • delicious API. Almost all of the neat tools made for that system can be modified to work with %1$s instead. If you find a tool that won\'t let you change the API address, ask the creator to add this setting. You never know, they might just do it.'), $GLOBALS['sitename']); ?>
-includeTemplate($GLOBALS['bottom_include']); -?> \ No newline at end of file +includeTemplate($GLOBALS['bottom_include']); ?> \ No newline at end of file diff --git a/templates/bookmarks.tpl.php b/templates/bookmarks.tpl.php index 368ec8b..ab35787 100644 --- a/templates/bookmarks.tpl.php +++ b/templates/bookmarks.tpl.php @@ -1,18 +1,14 @@ getCurrentUserId(); $this->includeTemplate($GLOBALS['top_include']); -include('search.inc.php'); +include 'search.inc.php'; if (count($bookmarks) > 0) { ?> - -

/ @@ -25,7 +21,6 @@ window.onload = playerLoad; } ?>

- 0 ? ' start="'. ++$start .'"' : ''); ?> id="bookmarks"> getCurrentUserId(); @@ -115,6 +115,4 @@ document.write('<\/p>'); document.write('<\/div>'); - \ No newline at end of file + \ No newline at end of file diff --git a/templates/editbookmark.tpl.php b/templates/editbookmark.tpl.php index a590d81..16b56c7 100644 --- a/templates/editbookmark.tpl.php +++ b/templates/editbookmark.tpl.php @@ -17,12 +17,6 @@ switch ($row['bStatus']) { } ?> - -
@@ -60,27 +54,24 @@ window.onload = function() {
- - - - - - - + + + + + + + +
+ includeTemplate($GLOBALS['top_include']); -?> - - +includeTemplate($GLOBALS['top_include']); ?>
- + - + @@ -29,7 +21,10 @@ window.onload = function() {
.

»

+ -includeTemplate($GLOBALS['bottom_include']); -?> \ No newline at end of file +includeTemplate($GLOBALS['bottom_include']); ?> \ No newline at end of file diff --git a/templates/register.tpl.php b/templates/register.tpl.php index 7160535..8629ec2 100644 --- a/templates/register.tpl.php +++ b/templates/register.tpl.php @@ -1,12 +1,4 @@ -includeTemplate($GLOBALS['top_include']); -?> - - +includeTemplate($GLOBALS['top_include']); ?>

.

@@ -14,7 +6,7 @@ window.onload = function() { - + @@ -34,7 +26,26 @@ window.onload = function() {
+ -includeTemplate($GLOBALS['bottom_include']); -?> \ No newline at end of file +includeTemplate($GLOBALS['bottom_include']); ?> \ No newline at end of file diff --git a/templates/top.inc.php b/templates/top.inc.php index 64213fd..44b31df 100644 --- a/templates/top.inc.php +++ b/templates/top.inc.php @@ -11,10 +11,11 @@ for ($i = 0; $i < $size; $i++) { echo ''; } - if ($loadjs) { - echo ''; - } ?> + + + + diff --git a/watch.php b/watch.php index 6a01e9a..61981bb 100644 --- a/watch.php +++ b/watch.php @@ -1,6 +1,6 @@