ソースを参照

allow proper MB handling (utf8) for preg match

euromark 12 年 前
コミット
67028e5ce9
3 ファイル変更118 行追加6 行削除
  1. 60 6
      Lib/Utility/Utility.php
  2. 1 0
      README.md
  3. 57 0
      Test/Case/Lib/Utility/UtilityTest.php

+ 60 - 6
Lib/Utility/Utility.php

@@ -1,4 +1,5 @@
 <?php
 <?php
+
 App::uses('Sanitize', 'Utility');
 App::uses('Sanitize', 'Utility');
 App::uses('Router', 'Routing');
 App::uses('Router', 'Routing');
 
 
@@ -22,7 +23,60 @@ class Utility {
 	 */
 	 */
 	public static function inArray($needle, $haystack) {
 	public static function inArray($needle, $haystack) {
 		$strict = !is_numeric($needle);
 		$strict = !is_numeric($needle);
-		return in_array((string)$needle, $haystack, $strict);
+		return in_array((string )$needle, $haystack, $strict);
+	}
+
+	/**
+	 * Multibyte analogue of preg_match_all() function.
+	 * By default this works properly with UTF8 strings.
+	 *
+	 * @param string $pattern The pattern to use.
+	 * @param string $subject The string to match.
+	 * @param array $matches Array of all matches in multi-dimensional array ordered according to flags.
+	 * @param int $flags
+	 * @param int $offset
+	 * @return array Result
+	 */
+	public static function pregMatchAll($pattern, $subject, $matches, $flags = null, $offset = null) {
+		$pattern = substr($pattern, 0, 1) . '(*UTF8)' . substr($pattern, 1);
+		return preg_match_all($pattern, $subject, $matches, $flags, $offset);
+	}
+
+	/**
+	 * Multibyte analogue of preg_match() function.
+	 * By default this works properly with UTF8 strings.
+	 *
+	 * @param string $pattern The pattern to use.
+	 * @param string $subject The string to match.
+	 * @param array $matches Array of all matches in multi-dimensional array ordered according to flags.
+	 * @param int $flags
+	 * @param int $offset
+	 * @return array Result
+	 */
+	public static function pregMatch($pattern, $subject, $matches, $flags = null, $offset = null) {
+		$pattern = substr($pattern, 0, 1) . '(*UTF8)' . substr($pattern, 1);
+		return preg_match($pattern, $subject, $matches, $flags, $offset);
+	}
+
+	/**
+	 * Multibyte analogue of str_split() function.
+	 * By default this works properly with UTF8 strings.
+	 *
+	 * @param string $text
+	 * @param int $length
+	 * @return array Result
+	 */
+	public static function strSplit($str, $length = 1) {
+		if ($length < 1) {
+			return false;
+		}
+		$result = array();
+		$space_key = null;
+		$c = mb_strlen($str);
+		for ($i = 0; $i < $c; $i += $length) {
+			$result[] = mb_substr($str, $i, $length);
+		}
+		return $result;
 	}
 	}
 
 
 	/**
 	/**
@@ -159,7 +213,7 @@ class Utility {
 
 
 		if (($pos = strpos($url, '.')) !== false) {
 		if (($pos = strpos($url, '.')) !== false) {
 			if (strpos(substr($url, 0, $pos), '//') === false) {
 			if (strpos(substr($url, 0, $pos), '//') === false) {
-				$url = $prefix.$url;
+				$url = $prefix . $url;
 			}
 			}
 		}
 		}
 		return $url;
 		return $url;
@@ -368,11 +422,11 @@ class Utility {
 		if (!$a) {
 		if (!$a) {
 			return array();
 			return array();
 		}
 		}
-		foreach($a as $k=>$v){
-			if(is_array($v)) {
-				$f= self::_arrayFlatten($v, $f);
+		foreach ($a as $k => $v) {
+			if (is_array($v)) {
+				$f = self::_arrayFlatten($v, $f);
 			} else {
 			} else {
-				$f[$k]=$v;
+				$f[$k] = $v;
 			}
 			}
 		}
 		}
 		return $f;
 		return $f;

+ 1 - 0
README.md

@@ -83,6 +83,7 @@ For details on how to contribute please read the [CONTRIBUTING page](CONTRIBUTIN
 
 
 ### License
 ### License
 Licensed under [The MIT License](http://www.opensource.org/licenses/mit-license.php)
 Licensed under [The MIT License](http://www.opensource.org/licenses/mit-license.php)
+unless specified otherwise (in the classes).
 
 
 ### Recent changes (possibly BC breaking)
 ### Recent changes (possibly BC breaking)
 
 

+ 57 - 0
Test/Case/Lib/Utility/UtilityTest.php

@@ -30,6 +30,63 @@ class UtilityTest extends MyCakeTestCase {
 		$this->assertFalse($res);
 		$this->assertFalse($res);
 	}
 	}
 
 
+	public function testPregMatch() {
+		$string = '<abc>';
+		preg_match('/\<(\w+)\>/', $string, $matches);
+		$this->assertSame(array($string, 'abc'), $matches);
+
+		Utility::pregMatch('/\<(\w+)\>/', $string, $matches);
+		$this->assertSame(array($string, 'abc'), $matches);
+
+		$string = '<äöü>';
+		preg_match('/\<(.+)\>/', $string, $matches);
+		$this->assertSame(array($string, 'äöü'), $matches);
+
+		Utility::pregMatch('/\<(.+)\>/', $string, $matches);
+		$this->assertSame(array($string, 'äöü'), $matches);
+
+		$string = 'D-81245 München';
+		preg_match('/(*UTF8)([\w+])-([a-z0-9]+)\s+\b([\w\s]+)\b/i', $string, $matches);
+		$expected = array(
+			$string,
+			'D',
+			'81245',
+			'München'
+		);
+		$this->assertSame($expected, $matches);
+
+		// we dont need the utf8 hack:
+		Utility::pregMatch('/([\w+])-([a-z0-9]+)\s+\b([\w\s]+)\b/i', $string, $matches);
+		$this->assertSame($expected, $matches);
+	}
+
+	public function testPregMatchAll() {
+		$string = 'D-81245 München';
+		preg_match_all('/(*UTF8)([\w+])-([a-z0-9]+)\s+\b([\w\s]+)\b/i', $string, $matches, PREG_SET_ORDER);
+		$expected = array(
+			array(
+				$string,
+				'D',
+				'81245',
+				'München'
+			)
+		);
+		$this->assertSame($expected, $matches);
+
+		// we dont need the utf8 hack:
+		Utility::pregMatchAll('/([\w+])-([a-z0-9]+)\s+\b([\w\s]+)\b/i', $string, $matches);
+		$this->assertSame($expected, $matches);
+	}
+
+	public function testStrSplit() {
+		$res = str_split('some äöü string', 7);
+		$expected = array('some äö', 'ü strin', 'g');
+		$this->assertNotSame($expected, $res);
+
+		$res = Utility::strSplit('some äöü string', 7);
+		$this->assertSame($expected, $res);
+	}
+
 	public function testTypeCast() {
 	public function testTypeCast() {
 		$res = Utility::typeCast(2, 'string');
 		$res = Utility::typeCast(2, 'string');
 		$this->assertNotSame(2, $res);
 		$this->assertNotSame(2, $res);