Browse Source

Add ascii() and utf8() validators.

These methods will let you confirm the byte ranges in an input. This is
helpful when dealing with MySQL's utf8 encodings which do not support
any 4 byte characters.
Mark Story 10 years ago
parent
commit
ca93925245
2 changed files with 125 additions and 1 deletions
  1. 42 0
      src/Validation/Validation.php
  2. 83 1
      tests/TestCase/Validation/ValidationTest.php

+ 42 - 0
src/Validation/Validation.php

@@ -1098,6 +1098,48 @@ class Validation
     }
 
     /**
+     * Check that the input value is within the ascii byte range.
+     *
+     * This method will reject all non-string values.
+     *
+     * @param string $value The value to check
+     * @return bool
+     */
+    public static function ascii($value)
+    {
+        if (!is_string($value)) {
+            return false;
+        }
+        return strlen($value) <= mb_strlen($value, 'utf-8');
+    }
+
+    /**
+     * Check that the input value is a utf8 string.
+     *
+     * This method will reject all non-string values.
+     *
+     * # Options
+     *
+     * - `extended` - Disallow bytes higher within the basic multilingual plane.
+     *   MySQL's older utf8 encoding type does not allow characters above
+     *   the basic multilingual plane. Defaults to false.
+     *
+     * @param string $value The value to check
+     * @return bool
+     */
+    public static function utf8($value, array $options = [])
+    {
+        if (!is_string($value)) {
+            return false;
+        }
+        $options += ['extended' => false];
+        if ($options['extended']) {
+            return true;
+        }
+        return preg_match('/[\x{10000}-\x{10FFFF}]/u', $value) === 0;
+    }
+
+    /**
      * Check that the input value is an integer
      *
      * This method will accept strings that contain only integer data

+ 83 - 1
tests/TestCase/Validation/ValidationTest.php

@@ -2566,7 +2566,7 @@ class ValidationTest extends TestCase
     }
 
     /**
-     * Test is_integer
+     * Test isInteger
      *
      * @return void
      */
@@ -2584,4 +2584,86 @@ class ValidationTest extends TestCase
         $this->assertFalse(Validation::isInteger(new \StdClass));
         $this->assertFalse(Validation::isInteger('2 bears'));
     }
+
+    /**
+     * Test ascii
+     *
+     * @return void
+     */
+    public function testAscii()
+    {
+        $this->assertTrue(Validation::ascii('1 big blue bus.'));
+        $this->assertTrue(Validation::ascii(',.<>[]{;/?\)()'));
+
+        $this->assertFalse(Validation::ascii([]));
+        $this->assertFalse(Validation::ascii(1001));
+        $this->assertFalse(Validation::ascii(3.14));
+        $this->assertFalse(Validation::ascii(new \StdClass));
+
+        // Latin-1 supplement
+        $this->assertFalse(Validation::ascii('some' . "\xc2\x82" . 'value'));
+        $this->assertFalse(Validation::ascii('some' . "\xc3\xbf" . 'value'));
+
+        // End of BMP
+        $this->assertFalse(Validation::ascii('some' . "\xef\xbf\xbd" . 'value'));
+
+        // Start of supplementary multilingual plane
+        $this->assertFalse(Validation::ascii('some' . "\xf0\x90\x80\x80" . 'value'));
+    }
+
+    /**
+     * Test utf8 basic
+     *
+     * @return void
+     */
+    public function testUtf8Basic()
+    {
+        $this->assertFalse(Validation::utf8([]));
+        $this->assertFalse(Validation::utf8(1001));
+        $this->assertFalse(Validation::utf8(3.14));
+        $this->assertFalse(Validation::utf8(new \StdClass));
+        $this->assertTrue(Validation::utf8('1 big blue bus.'));
+        $this->assertTrue(Validation::utf8(',.<>[]{;/?\)()'));
+
+        // Latin-1 supplement
+        $this->assertTrue(Validation::utf8('some' . "\xc2\x82" . 'value'));
+        $this->assertTrue(Validation::utf8('some' . "\xc3\xbf" . 'value'));
+
+        // End of BMP
+        $this->assertTrue(Validation::utf8('some' . "\xef\xbf\xbd" . 'value'));
+
+        // Start of supplementary multilingual plane
+        $this->assertFalse(Validation::utf8('some' . "\xf0\x90\x80\x80" . 'value'));
+
+        // Grinning face
+        $this->assertFalse(Validation::utf8('some' . "\xf0\x9f\x98\x80" . 'value'));
+    }
+
+    /**
+     * Test utf8 extended
+     *
+     * @return void
+     */
+    public function testUtf8Extended()
+    {
+        $this->assertFalse(Validation::utf8([], ['extended' => true]));
+        $this->assertFalse(Validation::utf8(1001, ['extended' => true]));
+        $this->assertFalse(Validation::utf8(3.14, ['extended' => true]));
+        $this->assertFalse(Validation::utf8(new \StdClass, ['extended' => true]));
+        $this->assertTrue(Validation::utf8('1 big blue bus.', ['extended' => true]));
+        $this->assertTrue(Validation::utf8(',.<>[]{;/?\)()', ['extended' => true]));
+
+        // Latin-1 supplement
+        $this->assertTrue(Validation::utf8('some' . "\xc2\x82" . 'value', ['extended' => true]));
+        $this->assertTrue(Validation::utf8('some' . "\xc3\xbf" . 'value', ['extended' => true]));
+
+        // End of BMP
+        $this->assertTrue(Validation::utf8('some' . "\xef\xbf\xbd" . 'value', ['extended' => true]));
+
+        // Start of supplementary multilingual plane
+        $this->assertTrue(Validation::utf8('some' . "\xf0\x90\x80\x80" . 'value', ['extended' => true]));
+
+        // Grinning face
+        $this->assertTrue(Validation::utf8('some' . "\xf0\x9f\x98\x80" . 'value', ['extended' => true]));
+    }
 }