Browse Source

Fix Text::truncate() on single words.

Fixing this issue required making a small behavior change around how
single unbreakable words are handled. Instead of being entirely omitted
as before, if a text fragment is not breakable, we do an exact slice.
This favors including *some* content over just the ellipsis. While this
is a behavior change, I don't think its very intuitive that an inexact
truncation will result in no text. This also changes how chains of
entities work as shown in the modified test case.

This method is in pretty rough shape and at some point in the future,
building a more robust HTML munger/tokenizer might be in order if we
continue to get issues reported for how the HTML option works.

Refs #8673
Mark Story 10 years ago
parent
commit
eb9ff41b9b
2 changed files with 76 additions and 74 deletions
  1. 52 38
      src/Utility/Text.php
  2. 24 36
      tests/TestCase/Utility/TextTest.php

+ 52 - 38
src/Utility/Text.php

@@ -608,49 +608,70 @@ class Text
                         }
                     }
 
-                    $truncate .= mb_substr($tag[3], 0, $left + $entitiesLength);
+                    if (!$options['exact']) {
+                        $words = explode(' ', $tag[3]);
+                        // Keep at least one word.
+                        if (count($words) === 1) {
+                            $truncate .= mb_substr($tag[3], 0, $left + $entitiesLength);
+                        } else {
+                            $wordLength = 0;
+                            $addWords = [];
+                            // Append words until the length is crossed.
+                            foreach ($words as $word) {
+                                // Add words until we have enough letters.
+                                if ($wordLength < $left + $entitiesLength) {
+                                    $addWords[] = $word;
+                                }
+                                // Include inter-word space.
+                                $wordLength += mb_strlen($word) + 1;
+                            }
+                            $truncate .= implode(' ', $addWords);
+
+                            // If the string is longer than requested, find the last space and cut there.
+                            $lastSpace = mb_strrpos($truncate, ' ');
+                            if (mb_strlen($truncate) > $totalLength && $lastSpace !== false) {
+                                $remainder = mb_substr($truncate, $lastSpace);
+                                $truncate = mb_substr($truncate, 0, $lastSpace);
+
+                                // Re-add close tags that were cut off.
+                                preg_match_all('/<\/([a-z]+)>/', $remainder, $droppedTags, PREG_SET_ORDER);
+                                if ($droppedTags) {
+                                    foreach ($droppedTags as $closingTag) {
+                                        if (!in_array($closingTag[1], $openTags)) {
+                                            array_unshift($openTags, $closingTag[1]);
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    } else {
+                        $truncate .= mb_substr($tag[3], 0, $left + $entitiesLength);
+                    }
                     break;
                 }
-
                 $truncate .= $tag[3];
+
                 $totalLength += $contentLength;
                 if ($totalLength >= $length) {
                     break;
                 }
             }
-        } else {
-            if (mb_strlen($text) <= $length) {
-                return $text;
+
+            $truncate .= $options['ellipsis'];
+
+            foreach ($openTags as $tag) {
+                $truncate .= '</' . $tag . '>';
             }
-            $truncate = mb_substr($text, 0, $length - mb_strlen($options['ellipsis']));
+            return $truncate;
+        }
+
+        if (mb_strlen($text) <= $length) {
+            return $text;
         }
+        $truncate = mb_substr($text, 0, $length - mb_strlen($options['ellipsis']));
+
         if (!$options['exact']) {
             $spacepos = mb_strrpos($truncate, ' ');
-            if ($options['html']) {
-                $truncateCheck = mb_substr($truncate, 0, $spacepos);
-                $lastOpenTag = mb_strrpos($truncateCheck, '<');
-                $lastCloseTag = mb_strrpos($truncateCheck, '>');
-                if ($lastOpenTag > $lastCloseTag) {
-                    preg_match_all('/<[\w]+[^>]*>/s', $truncate, $lastTagMatches);
-                    $lastTag = array_pop($lastTagMatches[0]);
-                    $spacepos = mb_strrpos($truncate, $lastTag) + mb_strlen($lastTag);
-                }
-                $bits = mb_substr($truncate, $spacepos);
-                preg_match_all('/<\/([a-z]+)>/', $bits, $droppedTags, PREG_SET_ORDER);
-                if (!empty($droppedTags)) {
-                    if (!empty($openTags)) {
-                        foreach ($droppedTags as $closingTag) {
-                            if (!in_array($closingTag[1], $openTags)) {
-                                array_unshift($openTags, $closingTag[1]);
-                            }
-                        }
-                    } else {
-                        foreach ($droppedTags as $closingTag) {
-                            $openTags[] = $closingTag[1];
-                        }
-                    }
-                }
-            }
             $truncate = mb_substr($truncate, 0, $spacepos);
 
             // If truncate still empty, then we don't need to count ellipsis in the cut.
@@ -660,13 +681,6 @@ class Text
         }
 
         $truncate .= $options['ellipsis'];
-
-        if ($options['html']) {
-            foreach ($openTags as $tag) {
-                $truncate .= '</' . $tag . '>';
-            }
-        }
-
         return $truncate;
     }
 

+ 24 - 36
tests/TestCase/Utility/TextTest.php

@@ -556,7 +556,7 @@ TEXT;
         $this->assertSame($this->Text->truncate($text1, 15, ['html' => true]), "The quick brow\xe2\x80\xa6");
         $this->assertSame($this->Text->truncate($text1, 15, ['exact' => false, 'html' => true]), "The quick\xe2\x80\xa6");
         $this->assertSame($this->Text->truncate($text2, 10, ['html' => true]), "Heiz&ouml;lr&uuml;c\xe2\x80\xa6");
-        $this->assertSame($this->Text->truncate($text2, 10, ['exact' => false, 'html' => true]), "Heiz&ouml;\xe2\x80\xa6");
+        $this->assertSame($this->Text->truncate($text2, 10, ['exact' => false, 'html' => true]), "Heiz&ouml;lr&uuml;c\xe2\x80\xa6");
         $this->assertSame($this->Text->truncate($text3, 20, ['html' => true]), "<b>&copy; 2005-2007, Cake S\xe2\x80\xa6</b>");
         $this->assertSame($this->Text->truncate($text4, 15, ['html' => true]), "<img src=\"mypic.jpg\"> This image ta\xe2\x80\xa6");
         $this->assertSame($this->Text->truncate($text4, 45, ['html' => true]), "<img src=\"mypic.jpg\"> This image tag is not XHTML conform!<br><hr/><b>But the\xe2\x80\xa6</b>");
@@ -576,43 +576,31 @@ TEXT;
             'exact' => false,
             'html' => true
         ]);
-        $expected = '<p><span style="font-size: medium;"><a>...</a></span></p>';
-        $this->assertEquals($expected, $result);
-
-        $text = '<p><span style="font-size: medium;">El biógrafo de Steve Jobs, Walter
-Isaacson, explica porqué Jobs le pidió que le hiciera su biografía en
-este artículo de El País.</span></p>
-<p><span style="font-size: medium;"><span style="font-size:
-large;">Por qué Steve era distinto.</span></span></p>
-<p><span style="font-size: medium;"><a href="http://www.elpais.com/
-articulo/primer/plano/Steve/era/distinto/elpepueconeg/
-20111009elpneglse_4/Tes">http://www.elpais.com/articulo/primer/plano/
-Steve/era/distinto/elpepueconeg/20111009elpneglse_4/Tes</a></span></p>
-<p><span style="font-size: medium;">Ya se ha publicado la biografía de
-Steve Jobs escrita por Walter Isaacson  "<strong>Steve Jobs by Walter
-Isaacson</strong>", aquí os dejamos la dirección de amazon donde
-podeís adquirirla.</span></p>
-<p><span style="font-size: medium;"><a>http://www.amazon.com/Steve-
-Jobs-Walter-Isaacson/dp/1451648537</a></span></p>';
-        $result = $this->Text->truncate($text, 500, [
-            'ellipsis' => '... ',
+        $expected = '<p><span style="font-size: medium;"><a>Iamates...</a></span></p>';
+        $this->assertEquals($expected, $result);
+    }
+
+    /**
+     * Test truncate() method with both exact and html.
+     * @return void
+     */
+    public function testTruncateExactHtml()
+    {
+        $text = '<a href="http://example.org">hello</a> world';
+        $expected = '<a href="http://example.org">hell..</a>';
+        $result = Text::truncate($text, 6, array(
+            'ellipsis' => '..',
+            'exact' => true,
+            'html' => true
+        ));
+        $this->assertEquals($expected, $result);
+
+        $expected = '<a href="http://example.org">hell..</a>';
+        $result = Text::truncate($text, 6, array(
+            'ellipsis' => '..',
             'exact' => false,
             'html' => true
-        ]);
-        $expected = '<p><span style="font-size: medium;">El biógrafo de Steve Jobs, Walter
-Isaacson, explica porqué Jobs le pidió que le hiciera su biografía en
-este artículo de El País.</span></p>
-<p><span style="font-size: medium;"><span style="font-size:
-large;">Por qué Steve era distinto.</span></span></p>
-<p><span style="font-size: medium;"><a href="http://www.elpais.com/
-articulo/primer/plano/Steve/era/distinto/elpepueconeg/
-20111009elpneglse_4/Tes">http://www.elpais.com/articulo/primer/plano/
-Steve/era/distinto/elpepueconeg/20111009elpneglse_4/Tes</a></span></p>
-<p><span style="font-size: medium;">Ya se ha publicado la biografía de
-Steve Jobs escrita por Walter Isaacson  "<strong>Steve Jobs by Walter
-Isaacson</strong>", aquí os dejamos la dirección de amazon donde
-podeís adquirirla.</span></p>
-<p><span style="font-size: medium;"><a>... </a></span></p>';
+        ));
         $this->assertEquals($expected, $result);
     }