Browse Source

Merge pull request #8524 from ADmad/slug-transliterator

Add Text::slug and Text::transliterate.

This deprecates Inflector::slug() as the new method leverages intl, and has customizable rules
that are higher level than those provided by Inflector::slug()
Mark Story 10 years ago
parent
commit
7beb1b4290
4 changed files with 270 additions and 3 deletions
  1. 1 0
      src/Utility/Inflector.php
  2. 91 0
      src/Utility/Text.php
  3. 6 3
      src/Utility/composer.json
  4. 172 0
      tests/TestCase/Utility/TextTest.php

+ 1 - 0
src/Utility/Inflector.php

@@ -731,6 +731,7 @@ class Inflector
      * Returns a string with all spaces converted to dashes (by default), accented
      * characters converted to non-accented characters, and non word characters removed.
      *
+     * @deprecated 3.2.7 Use Text::slug() instead.
      * @param string $string the string you want to slug
      * @param string $replacement will replace keys in map
      * @return string

+ 91 - 0
src/Utility/Text.php

@@ -24,6 +24,13 @@ class Text
 {
 
     /**
+     * Default transliterator id string.
+     *
+     * @param string $_defaultTransliteratorId Transliterator identifier string.
+     */
+    protected static $_defaultTransliteratorId = 'Any-Latin; Latin-ASCII; [\u0080-\u7fff] remove';
+
+    /**
      * Generate a random UUID version 4
      *
      * Warning: This method should not be used as a random seed for any cryptographic operations.
@@ -852,4 +859,88 @@ class Text
         }
         throw new InvalidArgumentException('No unit type.');
     }
+
+    /**
+     * Get default transliterator identifier string.
+     *
+     * @return string Transliterator identifier.
+     */
+    public static function getTransliteratorId()
+    {
+        return static::$_defaultTransliteratorId;
+    }
+
+    /**
+     * Set default transliterator identifier string.
+     *
+     * @param string $transliteratorId Transliterator identifier.
+     * @return void
+     */
+    public static function setTransliteratorId($transliteratorId)
+    {
+        static::$_defaultTransliteratorId = $transliteratorId;
+    }
+
+    /**
+     * Transliterate string.
+     *
+     * @param string $string String to transliterate.
+     * @param string|null $transliteratorId Transliterator identifier. If null
+     *   Text::$_defaultTransliteratorId will be used.
+     * @return string
+     * @see http://php.net/manual/en/transliterator.transliterate.php
+     */
+    public static function transliterate($string, $transliteratorId = null)
+    {
+        $transliteratorId = $transliteratorId ?: static::$_defaultTransliteratorId;
+        return transliterator_transliterate($transliteratorId, $string);
+    }
+
+    /**
+     * Returns a string with all spaces converted to dashes (by default),
+     * characters transliterated to ASCII characters, and non word characters removed.
+     *
+     * ### Options:
+     *
+     * - `replacement`: Replacement string. Default '-'.
+     * - `transliteratorId`: A valid tranliterator id string.
+     *   If default `null` Text::$_defaultTransliteratorId to be used.
+     *   If `false` no transliteration will be done, only non words will be removed.
+     * - `preserve`: Specific non-word character to preserve. Default `null`.
+     *   For e.g. this option can be set to '.' to generate clean file names.
+     *
+     * @param string $string the string you want to slug
+     * @param array $options If string it will be use as replacement character
+     *   or an array of options.
+     * @return string
+     */
+    public static function slug($string, $options = [])
+    {
+        if (is_string($options)) {
+            $options = ['replacement' => $options];
+        }
+        $options += [
+            'replacement' => '-',
+            'transliteratorId' => null,
+            'preserve' => null
+        ];
+
+        if ($options['transliteratorId'] !== false) {
+            $string = static::transliterate($string, $options['transliteratorId']);
+        }
+
+        $regex = '^\s\p{Ll}\p{Lm}\p{Lo}\p{Lt}\p{Lu}\p{Nd}';
+        if ($options['preserve']) {
+            $regex .= '(' . preg_quote($options['preserve'], '/') . ')';
+        }
+        $quotedReplacement = preg_quote($options['replacement'], '/');
+        $map = [
+            '/[' . $regex . ']/mu' => ' ',
+            '/[\s]+/mu' => $options['replacement'],
+            sprintf('/^[%s]+|[%s]+$/', $quotedReplacement, $quotedReplacement) => '',
+        ];
+        $string = preg_replace(array_keys($map), $map, $string);
+
+        return $string;
+    }
 }

+ 6 - 3
src/Utility/composer.json

@@ -4,10 +4,13 @@
     "license": "MIT",
     "authors": [
         {
-        "name": "CakePHP Community",
-        "homepage": "http://cakephp.org"
-    }
+            "name": "CakePHP Community",
+            "homepage": "http://cakephp.org"
+        }
     ],
+    "suggest": {
+      "ext-intl": "To use Text::transliterate() or Text::slug()"
+    },
     "autoload": {
         "psr-4": {
             "Cake\\Utility\\": "."

+ 172 - 0
tests/TestCase/Utility/TextTest.php

@@ -1588,4 +1588,176 @@ podeís adquirirla.</span></p>
             [['size' => '2VB', 'default' => 'Unknown type'], 'Unknown type']
         ];
     }
+
+    /**
+     * Test getting/setting default transliterator id.
+     *
+     * @return void
+     */
+    public function testGetSetTransliteratorId()
+    {
+        $defaultTransliteratorId = 'Any-Latin; Latin-ASCII; [\u0080-\u7fff] remove';
+        $this->assertEquals($defaultTransliteratorId, Text::getTransliteratorId());
+
+        $expected = 'Latin-ASCII; [\u0080-\u7fff] remove';
+        Text::setTransliteratorId($expected);
+        $this->assertEquals($expected, Text::getTransliteratorId());
+
+        Text::setTransliteratorId($defaultTransliteratorId);
+    }
+
+    /**
+     * Data provider for testTransliterate()
+     *
+     * @return array
+     */
+    public function transliterateInputProvider()
+    {
+        return [
+            [
+                'Foo Bar: Not just for breakfast any-more', null,
+                'Foo Bar: Not just for breakfast any-more'
+            ],
+            [
+                'A æ Übérmensch på høyeste nivå! И я люблю PHP! есть. fi ¦', null,
+                'A ae Ubermensch pa hoyeste niva! I a lublu PHP! est. fi '
+            ],
+            [
+                'Äpfel Über Öl grün ärgert groß öko', null,
+                'Apfel Uber Ol grun argert gross oko'
+            ],
+            [
+                'La langue française est un attribut de souveraineté en France', null,
+                'La langue francaise est un attribut de souverainete en France'
+            ],
+            [
+                '!@$#exciting stuff! - what !@-# was that?', null,
+                '!@$#exciting stuff! - what !@-# was that?'
+            ],
+            [
+                'controller/action/りんご/1', null,
+                'controller/action/ringo/1'
+            ],
+            [
+                'の話が出たので大丈夫かなあと', null,
+                'no huaga chutanode da zhang fukanaato'
+            ],
+            [
+                'posts/view/한국어/page:1/sort:asc', null,
+                'posts/view/hangug-eo/page:1/sort:asc'
+            ],
+            [
+                "non\xc2\xa0breaking\xc2\xa0space", null,
+                'non breaking space'
+            ]
+        ];
+    }
+
+    /**
+     * testTransliterate method
+     *
+     * @param string $string String
+     * @param string $transliteratorId Transliterator Id
+     * @param String $expected Exepected string
+     * @return void
+     * @dataProvider transliterateInputProvider
+     */
+    public function testTransliterate($string, $transliteratorId, $expected)
+    {
+        $result = Text::transliterate($string, $transliteratorId);
+        $this->assertEquals($expected, $result);
+    }
+
+    public function slugInputProvider()
+    {
+        return [
+            [
+                'Foo Bar: Not just for breakfast any-more', [],
+                'Foo-Bar-Not-just-for-breakfast-any-more'
+            ],
+            [
+                'Foo Bar: Not just for breakfast any-more', ['replacement' => '_'],
+                'Foo_Bar_Not_just_for_breakfast_any_more'
+            ],
+            [
+                'Foo Bar: Not just for breakfast any-more', ['replacement' => '+'],
+                'Foo+Bar+Not+just+for+breakfast+any+more'
+            ],
+            [
+                'A æ Übérmensch på høyeste nivå! И я люблю PHP! есть. fi ¦', [],
+                'A-ae-Ubermensch-pa-hoyeste-niva-I-a-lublu-PHP-est-fi'
+            ],
+            [
+                'A æ Übérmensch på høyeste nivå! И я люблю PHP! есть. fi ¦', ['transliteratorId' => 'Latin-ASCII'],
+                'A-ae-Ubermensch-pa-hoyeste-niva-И-я-люблю-PHP-есть-fi'
+            ],
+            [
+                'Äpfel Über Öl grün ärgert groß öko', [],
+                'Apfel-Uber-Ol-grun-argert-gross-oko'
+            ],
+            [
+                'The truth - and- more- news', [],
+                'The-truth-and-more-news'
+            ],
+            [
+                'The truth: and more news', [],
+                'The-truth-and-more-news'
+            ],
+            [
+                'La langue française est un attribut de souveraineté en France', [],
+                'La-langue-francaise-est-un-attribut-de-souverainete-en-France'
+            ],
+            [
+                '!@$#exciting stuff! - what !@-# was that?', [],
+                'exciting-stuff-what-was-that'
+            ],
+            [
+                '20% of profits went to me!', [],
+                '20-of-profits-went-to-me'
+            ],
+            [
+                '#this melts your face1#2#3', [],
+                'this-melts-your-face1-2-3'
+            ],
+            [
+                'controller/action/りんご/1', ['transliteratorId' => false],
+                'controller-action-りんご-1'
+            ],
+            [
+                'の話が出たので大丈夫かなあと', ['transliteratorId' => false],
+                'の話が出たので大丈夫かなあと'
+            ],
+            [
+                'posts/view/한국어/page:1/sort:asc', ['transliteratorId' => false],
+                'posts-view-한국어-page-1-sort-asc'
+            ],
+            [
+                "non\xc2\xa0breaking\xc2\xa0space", [],
+                'non-breaking-space'
+            ],
+            [
+                'Foo Bar: Not just for breakfast any-more', ['replacement' => ''],
+                'FooBarNotjustforbreakfastanymore'
+            ],
+            [
+                'clean!_me.tar.gz', ['preserve' => '.'],
+                'clean-me.tar.gz'
+            ]
+        ];
+    }
+
+    /**
+     * testSlug method
+     *
+     * @param string $string String
+     * @param array $options Options
+     * @param String $expected Exepected string
+     * @return void
+     * @dataProvider slugInputProvider
+     */
+    public function testSlug($string, $options, $expected)
+    {
+        $result = Text::slug($string, $options);
+        $this->assertEquals($expected, $result);
+    }
 }