Text.php 38 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120
  1. <?php
  2. /**
  3. * CakePHP(tm) : Rapid Development Framework (https://cakephp.org)
  4. * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
  5. *
  6. * Licensed under The MIT License
  7. * For full copyright and license information, please see the LICENSE.txt
  8. * Redistributions of files must retain the above copyright notice.
  9. *
  10. * @copyright Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
  11. * @link https://cakephp.org CakePHP(tm) Project
  12. * @since 1.2.0
  13. * @license https://opensource.org/licenses/mit-license.php MIT License
  14. */
  15. namespace Cake\Utility;
  16. use InvalidArgumentException;
  17. /**
  18. * Text handling methods.
  19. */
  20. class Text
  21. {
  22. /**
  23. * Default transliterator id string.
  24. *
  25. * @param string $_defaultTransliteratorId Transliterator identifier string.
  26. */
  27. protected static $_defaultTransliteratorId = 'Any-Latin; Latin-ASCII; [\u0080-\u7fff] remove';
  28. /**
  29. * Generate a random UUID version 4
  30. *
  31. * Warning: This method should not be used as a random seed for any cryptographic operations.
  32. * Instead you should use the openssl or mcrypt extensions.
  33. *
  34. * @see https://www.ietf.org/rfc/rfc4122.txt
  35. * @return string RFC 4122 UUID
  36. * @copyright Matt Farina MIT License https://github.com/lootils/uuid/blob/master/LICENSE
  37. */
  38. public static function uuid()
  39. {
  40. return sprintf(
  41. '%04x%04x-%04x-%04x-%04x-%04x%04x%04x',
  42. // 32 bits for "time_low"
  43. mt_rand(0, 65535),
  44. mt_rand(0, 65535),
  45. // 16 bits for "time_mid"
  46. mt_rand(0, 65535),
  47. // 12 bits before the 0100 of (version) 4 for "time_hi_and_version"
  48. mt_rand(0, 4095) | 0x4000,
  49. // 16 bits, 8 bits for "clk_seq_hi_res",
  50. // 8 bits for "clk_seq_low",
  51. // two most significant bits holds zero and one for variant DCE1.1
  52. mt_rand(0, 0x3fff) | 0x8000,
  53. // 48 bits for "node"
  54. mt_rand(0, 65535),
  55. mt_rand(0, 65535),
  56. mt_rand(0, 65535)
  57. );
  58. }
  59. /**
  60. * Tokenizes a string using $separator, ignoring any instance of $separator that appears between
  61. * $leftBound and $rightBound.
  62. *
  63. * @param string $data The data to tokenize.
  64. * @param string $separator The token to split the data on.
  65. * @param string $leftBound The left boundary to ignore separators in.
  66. * @param string $rightBound The right boundary to ignore separators in.
  67. * @return array|string Array of tokens in $data or original input if empty.
  68. */
  69. public static function tokenize($data, $separator = ',', $leftBound = '(', $rightBound = ')')
  70. {
  71. if (empty($data)) {
  72. return [];
  73. }
  74. $depth = 0;
  75. $offset = 0;
  76. $buffer = '';
  77. $results = [];
  78. $length = mb_strlen($data);
  79. $open = false;
  80. while ($offset <= $length) {
  81. $tmpOffset = -1;
  82. $offsets = [
  83. mb_strpos($data, $separator, $offset),
  84. mb_strpos($data, $leftBound, $offset),
  85. mb_strpos($data, $rightBound, $offset)
  86. ];
  87. for ($i = 0; $i < 3; $i++) {
  88. if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) {
  89. $tmpOffset = $offsets[$i];
  90. }
  91. }
  92. if ($tmpOffset !== -1) {
  93. $buffer .= mb_substr($data, $offset, $tmpOffset - $offset);
  94. $char = mb_substr($data, $tmpOffset, 1);
  95. if (!$depth && $char === $separator) {
  96. $results[] = $buffer;
  97. $buffer = '';
  98. } else {
  99. $buffer .= $char;
  100. }
  101. if ($leftBound !== $rightBound) {
  102. if ($char === $leftBound) {
  103. $depth++;
  104. }
  105. if ($char === $rightBound) {
  106. $depth--;
  107. }
  108. } else {
  109. if ($char === $leftBound) {
  110. if (!$open) {
  111. $depth++;
  112. $open = true;
  113. } else {
  114. $depth--;
  115. $open = false;
  116. }
  117. }
  118. }
  119. $offset = ++$tmpOffset;
  120. } else {
  121. $results[] = $buffer . mb_substr($data, $offset);
  122. $offset = $length + 1;
  123. }
  124. }
  125. if (empty($results) && !empty($buffer)) {
  126. $results[] = $buffer;
  127. }
  128. if (!empty($results)) {
  129. return array_map('trim', $results);
  130. }
  131. return [];
  132. }
  133. /**
  134. * Replaces variable placeholders inside a $str with any given $data. Each key in the $data array
  135. * corresponds to a variable placeholder name in $str.
  136. * Example:
  137. * ```
  138. * Text::insert(':name is :age years old.', ['name' => 'Bob', 'age' => '65']);
  139. * ```
  140. * Returns: Bob is 65 years old.
  141. *
  142. * Available $options are:
  143. *
  144. * - before: The character or string in front of the name of the variable placeholder (Defaults to `:`)
  145. * - after: The character or string after the name of the variable placeholder (Defaults to null)
  146. * - escape: The character or string used to escape the before character / string (Defaults to `\`)
  147. * - format: A regex to use for matching variable placeholders. Default is: `/(?<!\\)\:%s/`
  148. * (Overwrites before, after, breaks escape / clean)
  149. * - clean: A boolean or array with instructions for Text::cleanInsert
  150. *
  151. * @param string $str A string containing variable placeholders
  152. * @param array $data A key => val array where each key stands for a placeholder variable name
  153. * to be replaced with val
  154. * @param array $options An array of options, see description above
  155. * @return string
  156. */
  157. public static function insert($str, $data, array $options = [])
  158. {
  159. $defaults = [
  160. 'before' => ':', 'after' => null, 'escape' => '\\', 'format' => null, 'clean' => false
  161. ];
  162. $options += $defaults;
  163. $format = $options['format'];
  164. $data = (array)$data;
  165. if (empty($data)) {
  166. return $options['clean'] ? static::cleanInsert($str, $options) : $str;
  167. }
  168. if (!isset($format)) {
  169. $format = sprintf(
  170. '/(?<!%s)%s%%s%s/',
  171. preg_quote($options['escape'], '/'),
  172. str_replace('%', '%%', preg_quote($options['before'], '/')),
  173. str_replace('%', '%%', preg_quote($options['after'], '/'))
  174. );
  175. }
  176. if (strpos($str, '?') !== false && is_numeric(key($data))) {
  177. $offset = 0;
  178. while (($pos = strpos($str, '?', $offset)) !== false) {
  179. $val = array_shift($data);
  180. $offset = $pos + strlen($val);
  181. $str = substr_replace($str, $val, $pos, 1);
  182. }
  183. return $options['clean'] ? static::cleanInsert($str, $options) : $str;
  184. }
  185. asort($data);
  186. $dataKeys = array_keys($data);
  187. $hashKeys = array_map('crc32', $dataKeys);
  188. $tempData = array_combine($dataKeys, $hashKeys);
  189. krsort($tempData);
  190. foreach ($tempData as $key => $hashVal) {
  191. $key = sprintf($format, preg_quote($key, '/'));
  192. $str = preg_replace($key, $hashVal, $str);
  193. }
  194. $dataReplacements = array_combine($hashKeys, array_values($data));
  195. foreach ($dataReplacements as $tmpHash => $tmpValue) {
  196. $tmpValue = is_array($tmpValue) ? '' : $tmpValue;
  197. $str = str_replace($tmpHash, $tmpValue, $str);
  198. }
  199. if (!isset($options['format']) && isset($options['before'])) {
  200. $str = str_replace($options['escape'] . $options['before'], $options['before'], $str);
  201. }
  202. return $options['clean'] ? static::cleanInsert($str, $options) : $str;
  203. }
  204. /**
  205. * Cleans up a Text::insert() formatted string with given $options depending on the 'clean' key in
  206. * $options. The default method used is text but html is also available. The goal of this function
  207. * is to replace all whitespace and unneeded markup around placeholders that did not get replaced
  208. * by Text::insert().
  209. *
  210. * @param string $str String to clean.
  211. * @param array $options Options list.
  212. * @return string
  213. * @see \Cake\Utility\Text::insert()
  214. */
  215. public static function cleanInsert($str, array $options)
  216. {
  217. $clean = $options['clean'];
  218. if (!$clean) {
  219. return $str;
  220. }
  221. if ($clean === true) {
  222. $clean = ['method' => 'text'];
  223. }
  224. if (!is_array($clean)) {
  225. $clean = ['method' => $options['clean']];
  226. }
  227. switch ($clean['method']) {
  228. case 'html':
  229. $clean += [
  230. 'word' => '[\w,.]+',
  231. 'andText' => true,
  232. 'replacement' => '',
  233. ];
  234. $kleenex = sprintf(
  235. '/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i',
  236. preg_quote($options['before'], '/'),
  237. $clean['word'],
  238. preg_quote($options['after'], '/')
  239. );
  240. $str = preg_replace($kleenex, $clean['replacement'], $str);
  241. if ($clean['andText']) {
  242. $options['clean'] = ['method' => 'text'];
  243. $str = static::cleanInsert($str, $options);
  244. }
  245. break;
  246. case 'text':
  247. $clean += [
  248. 'word' => '[\w,.]+',
  249. 'gap' => '[\s]*(?:(?:and|or)[\s]*)?',
  250. 'replacement' => '',
  251. ];
  252. $kleenex = sprintf(
  253. '/(%s%s%s%s|%s%s%s%s)/',
  254. preg_quote($options['before'], '/'),
  255. $clean['word'],
  256. preg_quote($options['after'], '/'),
  257. $clean['gap'],
  258. $clean['gap'],
  259. preg_quote($options['before'], '/'),
  260. $clean['word'],
  261. preg_quote($options['after'], '/')
  262. );
  263. $str = preg_replace($kleenex, $clean['replacement'], $str);
  264. break;
  265. }
  266. return $str;
  267. }
  268. /**
  269. * Wraps text to a specific width, can optionally wrap at word breaks.
  270. *
  271. * ### Options
  272. *
  273. * - `width` The width to wrap to. Defaults to 72.
  274. * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
  275. * - `indent` String to indent with. Defaults to null.
  276. * - `indentAt` 0 based index to start indenting at. Defaults to 0.
  277. *
  278. * @param string $text The text to format.
  279. * @param array|int $options Array of options to use, or an integer to wrap the text to.
  280. * @return string Formatted text.
  281. */
  282. public static function wrap($text, $options = [])
  283. {
  284. if (is_numeric($options)) {
  285. $options = ['width' => $options];
  286. }
  287. $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
  288. if ($options['wordWrap']) {
  289. $wrapped = self::wordWrap($text, $options['width'], "\n");
  290. } else {
  291. $wrapped = trim(chunk_split($text, $options['width'] - 1, "\n"));
  292. }
  293. if (!empty($options['indent'])) {
  294. $chunks = explode("\n", $wrapped);
  295. for ($i = $options['indentAt'], $len = count($chunks); $i < $len; $i++) {
  296. $chunks[$i] = $options['indent'] . $chunks[$i];
  297. }
  298. $wrapped = implode("\n", $chunks);
  299. }
  300. return $wrapped;
  301. }
  302. /**
  303. * Wraps a complete block of text to a specific width, can optionally wrap
  304. * at word breaks.
  305. *
  306. * ### Options
  307. *
  308. * - `width` The width to wrap to. Defaults to 72.
  309. * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
  310. * - `indent` String to indent with. Defaults to null.
  311. * - `indentAt` 0 based index to start indenting at. Defaults to 0.
  312. *
  313. * @param string $text The text to format.
  314. * @param array|int $options Array of options to use, or an integer to wrap the text to.
  315. * @return string Formatted text.
  316. */
  317. public static function wrapBlock($text, $options = [])
  318. {
  319. if (is_numeric($options)) {
  320. $options = ['width' => $options];
  321. }
  322. $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
  323. if (!empty($options['indentAt']) && $options['indentAt'] === 0) {
  324. $indentLength = !empty($options['indent']) ? strlen($options['indent']) : 0;
  325. $options['width'] -= $indentLength;
  326. return self::wrap($text, $options);
  327. }
  328. $wrapped = self::wrap($text, $options);
  329. if (!empty($options['indent'])) {
  330. $indentationLength = mb_strlen($options['indent']);
  331. $chunks = explode("\n", $wrapped);
  332. $count = count($chunks);
  333. if ($count < 2) {
  334. return $wrapped;
  335. }
  336. $toRewrap = '';
  337. for ($i = $options['indentAt']; $i < $count; $i++) {
  338. $toRewrap .= mb_substr($chunks[$i], $indentationLength) . ' ';
  339. unset($chunks[$i]);
  340. }
  341. $options['width'] -= $indentationLength;
  342. $options['indentAt'] = 0;
  343. $rewrapped = self::wrap($toRewrap, $options);
  344. $newChunks = explode("\n", $rewrapped);
  345. $chunks = array_merge($chunks, $newChunks);
  346. $wrapped = implode("\n", $chunks);
  347. }
  348. return $wrapped;
  349. }
  350. /**
  351. * Unicode and newline aware version of wordwrap.
  352. *
  353. * @param string $text The text to format.
  354. * @param int $width The width to wrap to. Defaults to 72.
  355. * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
  356. * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
  357. * @return string Formatted text.
  358. */
  359. public static function wordWrap($text, $width = 72, $break = "\n", $cut = false)
  360. {
  361. $paragraphs = explode($break, $text);
  362. foreach ($paragraphs as &$paragraph) {
  363. $paragraph = static::_wordWrap($paragraph, $width, $break, $cut);
  364. }
  365. return implode($break, $paragraphs);
  366. }
  367. /**
  368. * Unicode aware version of wordwrap as helper method.
  369. *
  370. * @param string $text The text to format.
  371. * @param int $width The width to wrap to. Defaults to 72.
  372. * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
  373. * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
  374. * @return string Formatted text.
  375. */
  376. protected static function _wordWrap($text, $width = 72, $break = "\n", $cut = false)
  377. {
  378. if ($cut) {
  379. $parts = [];
  380. while (mb_strlen($text) > 0) {
  381. $part = mb_substr($text, 0, $width);
  382. $parts[] = trim($part);
  383. $text = trim(mb_substr($text, mb_strlen($part)));
  384. }
  385. return implode($break, $parts);
  386. }
  387. $parts = [];
  388. while (mb_strlen($text) > 0) {
  389. if ($width >= mb_strlen($text)) {
  390. $parts[] = trim($text);
  391. break;
  392. }
  393. $part = mb_substr($text, 0, $width);
  394. $nextChar = mb_substr($text, $width, 1);
  395. if ($nextChar !== ' ') {
  396. $breakAt = mb_strrpos($part, ' ');
  397. if ($breakAt === false) {
  398. $breakAt = mb_strpos($text, ' ', $width);
  399. }
  400. if ($breakAt === false) {
  401. $parts[] = trim($text);
  402. break;
  403. }
  404. $part = mb_substr($text, 0, $breakAt);
  405. }
  406. $part = trim($part);
  407. $parts[] = $part;
  408. $text = trim(mb_substr($text, mb_strlen($part)));
  409. }
  410. return implode($break, $parts);
  411. }
  412. /**
  413. * Highlights a given phrase in a text. You can specify any expression in highlighter that
  414. * may include the \1 expression to include the $phrase found.
  415. *
  416. * ### Options:
  417. *
  418. * - `format` The piece of HTML with that the phrase will be highlighted
  419. * - `html` If true, will ignore any HTML tags, ensuring that only the correct text is highlighted
  420. * - `regex` A custom regex rule that is used to match words, default is '|$tag|iu'
  421. * - `limit` A limit, optional, defaults to -1 (none)
  422. *
  423. * @param string $text Text to search the phrase in.
  424. * @param string|array $phrase The phrase or phrases that will be searched.
  425. * @param array $options An array of HTML attributes and options.
  426. * @return string The highlighted text
  427. * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#highlighting-substrings
  428. */
  429. public static function highlight($text, $phrase, array $options = [])
  430. {
  431. if (empty($phrase)) {
  432. return $text;
  433. }
  434. $defaults = [
  435. 'format' => '<span class="highlight">\1</span>',
  436. 'html' => false,
  437. 'regex' => '|%s|iu',
  438. 'limit' => -1,
  439. ];
  440. $options += $defaults;
  441. $html = $format = $ellipsis = $exact = $limit = null;
  442. extract($options);
  443. if (is_array($phrase)) {
  444. $replace = [];
  445. $with = [];
  446. foreach ($phrase as $key => $segment) {
  447. $segment = '(' . preg_quote($segment, '|') . ')';
  448. if ($html) {
  449. $segment = "(?![^<]+>)$segment(?![^<]+>)";
  450. }
  451. $with[] = is_array($format) ? $format[$key] : $format;
  452. $replace[] = sprintf($options['regex'], $segment);
  453. }
  454. return preg_replace($replace, $with, $text, $limit);
  455. }
  456. $phrase = '(' . preg_quote($phrase, '|') . ')';
  457. if ($html) {
  458. $phrase = "(?![^<]+>)$phrase(?![^<]+>)";
  459. }
  460. return preg_replace(sprintf($options['regex'], $phrase), $format, $text, $limit);
  461. }
  462. /**
  463. * Strips given text of all links (<a href=....).
  464. *
  465. * *Warning* This method is not an robust solution in preventing XSS
  466. * or malicious HTML.
  467. *
  468. * @param string $text Text
  469. * @return string The text without links
  470. * @deprecated 3.2.12 This method will be removed in 4.0.0
  471. */
  472. public static function stripLinks($text)
  473. {
  474. do {
  475. $text = preg_replace('#</?a([/\s][^>]*)?(>|$)#i', '', $text, -1, $count);
  476. } while ($count);
  477. return $text;
  478. }
  479. /**
  480. * Truncates text starting from the end.
  481. *
  482. * Cuts a string to the length of $length and replaces the first characters
  483. * with the ellipsis if the text is longer than length.
  484. *
  485. * ### Options:
  486. *
  487. * - `ellipsis` Will be used as Beginning and prepended to the trimmed string
  488. * - `exact` If false, $text will not be cut mid-word
  489. *
  490. * @param string $text String to truncate.
  491. * @param int $length Length of returned string, including ellipsis.
  492. * @param array $options An array of options.
  493. * @return string Trimmed string.
  494. */
  495. public static function tail($text, $length = 100, array $options = [])
  496. {
  497. $default = [
  498. 'ellipsis' => '...', 'exact' => true
  499. ];
  500. $options += $default;
  501. $exact = $ellipsis = null;
  502. extract($options);
  503. if (mb_strlen($text) <= $length) {
  504. return $text;
  505. }
  506. $truncate = mb_substr($text, mb_strlen($text) - $length + mb_strlen($ellipsis));
  507. if (!$exact) {
  508. $spacepos = mb_strpos($truncate, ' ');
  509. $truncate = $spacepos === false ? '' : trim(mb_substr($truncate, $spacepos));
  510. }
  511. return $ellipsis . $truncate;
  512. }
  513. /**
  514. * Truncates text.
  515. *
  516. * Cuts a string to the length of $length and replaces the last characters
  517. * with the ellipsis if the text is longer than length.
  518. *
  519. * ### Options:
  520. *
  521. * - `ellipsis` Will be used as ending and appended to the trimmed string
  522. * - `exact` If false, $text will not be cut mid-word
  523. * - `html` If true, HTML tags would be handled correctly
  524. * - `trimWidth` If true, $text will be truncated with the width
  525. *
  526. * @param string $text String to truncate.
  527. * @param int $length Length of returned string, including ellipsis.
  528. * @param array $options An array of HTML attributes and options.
  529. * @return string Trimmed string.
  530. * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#truncating-text
  531. */
  532. public static function truncate($text, $length = 100, array $options = [])
  533. {
  534. $default = [
  535. 'ellipsis' => '...', 'exact' => true, 'html' => false, 'trimWidth' => false,
  536. ];
  537. if (!empty($options['html']) && strtolower(mb_internal_encoding()) === 'utf-8') {
  538. $default['ellipsis'] = "\xe2\x80\xa6";
  539. }
  540. $options += $default;
  541. $prefix = '';
  542. $suffix = $options['ellipsis'];
  543. if ($options['html']) {
  544. $ellipsisLength = self::_strlen(strip_tags($options['ellipsis']), $options);
  545. $truncateLength = 0;
  546. $totalLength = 0;
  547. $openTags = [];
  548. $truncate = '';
  549. preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER);
  550. foreach ($tags as $tag) {
  551. $contentLength = self::_strlen($tag[3], $options);
  552. if ($truncate === '') {
  553. if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/i', $tag[2])) {
  554. if (preg_match('/<[\w]+[^>]*>/', $tag[0])) {
  555. array_unshift($openTags, $tag[2]);
  556. } elseif (preg_match('/<\/([\w]+)[^>]*>/', $tag[0], $closeTag)) {
  557. $pos = array_search($closeTag[1], $openTags);
  558. if ($pos !== false) {
  559. array_splice($openTags, $pos, 1);
  560. }
  561. }
  562. }
  563. $prefix .= $tag[1];
  564. if ($totalLength + $contentLength + $ellipsisLength > $length) {
  565. $truncate = $tag[3];
  566. $truncateLength = $length - $totalLength;
  567. } else {
  568. $prefix .= $tag[3];
  569. }
  570. }
  571. $totalLength += $contentLength;
  572. if ($totalLength > $length) {
  573. break;
  574. }
  575. }
  576. if ($totalLength <= $length) {
  577. return $text;
  578. }
  579. $text = $truncate;
  580. $length = $truncateLength;
  581. foreach ($openTags as $tag) {
  582. $suffix .= '</' . $tag . '>';
  583. }
  584. } else {
  585. if (self::_strlen($text, $options) <= $length) {
  586. return $text;
  587. }
  588. $ellipsisLength = self::_strlen($options['ellipsis'], $options);
  589. }
  590. $result = self::_substr($text, 0, $length - $ellipsisLength, $options);
  591. if (!$options['exact']) {
  592. if (self::_substr($text, $length - $ellipsisLength, 1, $options) !== ' ') {
  593. $result = self::_removeLastWord($result);
  594. }
  595. // If result is empty, then we don't need to count ellipsis in the cut.
  596. if (!strlen($result)) {
  597. $result = self::_substr($text, 0, $length, $options);
  598. }
  599. }
  600. return $prefix . $result . $suffix;
  601. }
  602. /**
  603. * Truncate text with specified width.
  604. *
  605. * @param string $text String to truncate.
  606. * @param int $length Length of returned string, including ellipsis.
  607. * @param array $options An array of HTML attributes and options.
  608. * @return string Trimmed string.
  609. * @see \Cake\Utility\Text::truncate()
  610. */
  611. public static function truncateByWidth($text, $length = 100, array $options = [])
  612. {
  613. return static::truncate($text, $length, ['trimWidth' => true] + $options);
  614. }
  615. /**
  616. * Get string length.
  617. *
  618. * ### Options:
  619. *
  620. * - `html` If true, HTML entities will be handled as decoded characters.
  621. * - `trimWidth` If true, the width will return.
  622. *
  623. * @param string $text The string being checked for length
  624. * @param array $options An array of options.
  625. * @return int
  626. */
  627. protected static function _strlen($text, array $options)
  628. {
  629. if (empty($options['trimWidth'])) {
  630. $strlen = 'mb_strlen';
  631. } else {
  632. $strlen = 'mb_strwidth';
  633. }
  634. if (empty($options['html'])) {
  635. return $strlen($text);
  636. }
  637. $pattern = '/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i';
  638. $replace = preg_replace_callback(
  639. $pattern,
  640. function ($match) use ($strlen) {
  641. $utf8 = html_entity_decode($match[0], ENT_HTML5 | ENT_QUOTES, 'UTF-8');
  642. return str_repeat(' ', $strlen($utf8, 'UTF-8'));
  643. },
  644. $text
  645. );
  646. return $strlen($replace);
  647. }
  648. /**
  649. * Return part of a string.
  650. *
  651. * ### Options:
  652. *
  653. * - `html` If true, HTML entities will be handled as decoded characters.
  654. * - `trimWidth` If true, will be truncated with specified width.
  655. *
  656. * @param string $text The input string.
  657. * @param int $start The position to begin extracting.
  658. * @param int $length The desired length.
  659. * @param array $options An array of options.
  660. * @return string
  661. */
  662. protected static function _substr($text, $start, $length, array $options)
  663. {
  664. if (empty($options['trimWidth'])) {
  665. $substr = 'mb_substr';
  666. } else {
  667. $substr = 'mb_strimwidth';
  668. }
  669. $maxPosition = self::_strlen($text, ['trimWidth' => false] + $options);
  670. if ($start < 0) {
  671. $start += $maxPosition;
  672. if ($start < 0) {
  673. $start = 0;
  674. }
  675. }
  676. if ($start >= $maxPosition) {
  677. return '';
  678. }
  679. if ($length === null) {
  680. $length = self::_strlen($text, $options);
  681. }
  682. if ($length < 0) {
  683. $text = self::_substr($text, $start, null, $options);
  684. $start = 0;
  685. $length += self::_strlen($text, $options);
  686. }
  687. if ($length <= 0) {
  688. return '';
  689. }
  690. if (empty($options['html'])) {
  691. return (string)$substr($text, $start, $length);
  692. }
  693. $totalOffset = 0;
  694. $totalLength = 0;
  695. $result = '';
  696. $pattern = '/(&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};)/i';
  697. $parts = preg_split($pattern, $text, -1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY);
  698. foreach ($parts as $part) {
  699. $offset = 0;
  700. if ($totalOffset < $start) {
  701. $len = self::_strlen($part, ['trimWidth' => false] + $options);
  702. if ($totalOffset + $len <= $start) {
  703. $totalOffset += $len;
  704. continue;
  705. }
  706. $offset = $start - $totalOffset;
  707. $totalOffset = $start;
  708. }
  709. $len = self::_strlen($part, $options);
  710. if ($offset !== 0 || $totalLength + $len > $length) {
  711. if (strpos($part, '&') === 0 && preg_match($pattern, $part)
  712. && $part !== html_entity_decode($part, ENT_HTML5 | ENT_QUOTES, 'UTF-8')
  713. ) {
  714. // Entities cannot be passed substr.
  715. continue;
  716. }
  717. $part = $substr($part, $offset, $length - $totalLength);
  718. $len = self::_strlen($part, $options);
  719. }
  720. $result .= $part;
  721. $totalLength += $len;
  722. if ($totalLength >= $length) {
  723. break;
  724. }
  725. }
  726. return $result;
  727. }
  728. /**
  729. * Removes the last word from the input text.
  730. *
  731. * @param string $text The input text
  732. * @return string
  733. */
  734. protected static function _removeLastWord($text)
  735. {
  736. $spacepos = mb_strrpos($text, ' ');
  737. if ($spacepos !== false) {
  738. $lastWord = mb_strrpos($text, $spacepos);
  739. // Some languages are written without word separation.
  740. // We recognize a string as a word if it doesn't contain any full-width characters.
  741. if (mb_strwidth($lastWord) === mb_strlen($lastWord)) {
  742. $text = mb_substr($text, 0, $spacepos);
  743. }
  744. return $text;
  745. }
  746. return '';
  747. }
  748. /**
  749. * Extracts an excerpt from the text surrounding the phrase with a number of characters on each side
  750. * determined by radius.
  751. *
  752. * @param string $text String to search the phrase in
  753. * @param string $phrase Phrase that will be searched for
  754. * @param int $radius The amount of characters that will be returned on each side of the founded phrase
  755. * @param string $ellipsis Ending that will be appended
  756. * @return string Modified string
  757. * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#extracting-an-excerpt
  758. */
  759. public static function excerpt($text, $phrase, $radius = 100, $ellipsis = '...')
  760. {
  761. if (empty($text) || empty($phrase)) {
  762. return static::truncate($text, $radius * 2, ['ellipsis' => $ellipsis]);
  763. }
  764. $append = $prepend = $ellipsis;
  765. $phraseLen = mb_strlen($phrase);
  766. $textLen = mb_strlen($text);
  767. $pos = mb_strpos(mb_strtolower($text), mb_strtolower($phrase));
  768. if ($pos === false) {
  769. return mb_substr($text, 0, $radius) . $ellipsis;
  770. }
  771. $startPos = $pos - $radius;
  772. if ($startPos <= 0) {
  773. $startPos = 0;
  774. $prepend = '';
  775. }
  776. $endPos = $pos + $phraseLen + $radius;
  777. if ($endPos >= $textLen) {
  778. $endPos = $textLen;
  779. $append = '';
  780. }
  781. $excerpt = mb_substr($text, $startPos, $endPos - $startPos);
  782. $excerpt = $prepend . $excerpt . $append;
  783. return $excerpt;
  784. }
  785. /**
  786. * Creates a comma separated list where the last two items are joined with 'and', forming natural language.
  787. *
  788. * @param array $list The list to be joined.
  789. * @param string|null $and The word used to join the last and second last items together with. Defaults to 'and'.
  790. * @param string $separator The separator used to join all the other items together. Defaults to ', '.
  791. * @return string The glued together string.
  792. * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#converting-an-array-to-sentence-form
  793. */
  794. public static function toList(array $list, $and = null, $separator = ', ')
  795. {
  796. if ($and === null) {
  797. $and = __d('cake', 'and');
  798. }
  799. if (count($list) > 1) {
  800. return implode($separator, array_slice($list, null, -1)) . ' ' . $and . ' ' . array_pop($list);
  801. }
  802. return array_pop($list);
  803. }
  804. /**
  805. * Check if the string contain multibyte characters
  806. *
  807. * @param string $string value to test
  808. * @return bool
  809. */
  810. public static function isMultibyte($string)
  811. {
  812. $length = strlen($string);
  813. for ($i = 0; $i < $length; $i++) {
  814. $value = ord($string[$i]);
  815. if ($value > 128) {
  816. return true;
  817. }
  818. }
  819. return false;
  820. }
  821. /**
  822. * Converts a multibyte character string
  823. * to the decimal value of the character
  824. *
  825. * @param string $string String to convert.
  826. * @return array
  827. */
  828. public static function utf8($string)
  829. {
  830. $map = [];
  831. $values = [];
  832. $find = 1;
  833. $length = strlen($string);
  834. for ($i = 0; $i < $length; $i++) {
  835. $value = ord($string[$i]);
  836. if ($value < 128) {
  837. $map[] = $value;
  838. } else {
  839. if (empty($values)) {
  840. $find = ($value < 224) ? 2 : 3;
  841. }
  842. $values[] = $value;
  843. if (count($values) === $find) {
  844. if ($find == 3) {
  845. $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
  846. } else {
  847. $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
  848. }
  849. $values = [];
  850. $find = 1;
  851. }
  852. }
  853. }
  854. return $map;
  855. }
  856. /**
  857. * Converts the decimal value of a multibyte character string
  858. * to a string
  859. *
  860. * @param array $array Array
  861. * @return string
  862. */
  863. public static function ascii(array $array)
  864. {
  865. $ascii = '';
  866. foreach ($array as $utf8) {
  867. if ($utf8 < 128) {
  868. $ascii .= chr($utf8);
  869. } elseif ($utf8 < 2048) {
  870. $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
  871. $ascii .= chr(128 + ($utf8 % 64));
  872. } else {
  873. $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
  874. $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
  875. $ascii .= chr(128 + ($utf8 % 64));
  876. }
  877. }
  878. return $ascii;
  879. }
  880. /**
  881. * Converts filesize from human readable string to bytes
  882. *
  883. * @param string $size Size in human readable string like '5MB', '5M', '500B', '50kb' etc.
  884. * @param mixed $default Value to be returned when invalid size was used, for example 'Unknown type'
  885. * @return mixed Number of bytes as integer on success, `$default` on failure if not false
  886. * @throws \InvalidArgumentException On invalid Unit type.
  887. * @link https://book.cakephp.org/3.0/en/core-libraries/text.html#Cake\Utility\Text::parseFileSize
  888. */
  889. public static function parseFileSize($size, $default = false)
  890. {
  891. if (ctype_digit($size)) {
  892. return (int)$size;
  893. }
  894. $size = strtoupper($size);
  895. $l = -2;
  896. $i = array_search(substr($size, -2), ['KB', 'MB', 'GB', 'TB', 'PB']);
  897. if ($i === false) {
  898. $l = -1;
  899. $i = array_search(substr($size, -1), ['K', 'M', 'G', 'T', 'P']);
  900. }
  901. if ($i !== false) {
  902. $size = substr($size, 0, $l);
  903. return $size * pow(1024, $i + 1);
  904. }
  905. if (substr($size, -1) === 'B' && ctype_digit(substr($size, 0, -1))) {
  906. $size = substr($size, 0, -1);
  907. return (int)$size;
  908. }
  909. if ($default !== false) {
  910. return $default;
  911. }
  912. throw new InvalidArgumentException('No unit type.');
  913. }
  914. /**
  915. * Get default transliterator identifier string.
  916. *
  917. * @return string Transliterator identifier.
  918. */
  919. public static function getTransliteratorId()
  920. {
  921. return static::$_defaultTransliteratorId;
  922. }
  923. /**
  924. * Set default transliterator identifier string.
  925. *
  926. * @param string $transliteratorId Transliterator identifier.
  927. * @return void
  928. */
  929. public static function setTransliteratorId($transliteratorId)
  930. {
  931. static::$_defaultTransliteratorId = $transliteratorId;
  932. }
  933. /**
  934. * Transliterate string.
  935. *
  936. * @param string $string String to transliterate.
  937. * @param string|null $transliteratorId Transliterator identifier. If null
  938. * Text::$_defaultTransliteratorId will be used.
  939. * @return string
  940. * @see https://secure.php.net/manual/en/transliterator.transliterate.php
  941. */
  942. public static function transliterate($string, $transliteratorId = null)
  943. {
  944. $transliteratorId = $transliteratorId ?: static::$_defaultTransliteratorId;
  945. return transliterator_transliterate($transliteratorId, $string);
  946. }
  947. /**
  948. * Returns a string with all spaces converted to dashes (by default),
  949. * characters transliterated to ASCII characters, and non word characters removed.
  950. *
  951. * ### Options:
  952. *
  953. * - `replacement`: Replacement string. Default '-'.
  954. * - `transliteratorId`: A valid tranliterator id string.
  955. * If default `null` Text::$_defaultTransliteratorId to be used.
  956. * If `false` no transliteration will be done, only non words will be removed.
  957. * - `preserve`: Specific non-word character to preserve. Default `null`.
  958. * For e.g. this option can be set to '.' to generate clean file names.
  959. *
  960. * @param string $string the string you want to slug
  961. * @param array $options If string it will be use as replacement character
  962. * or an array of options.
  963. * @return string
  964. */
  965. public static function slug($string, $options = [])
  966. {
  967. if (is_string($options)) {
  968. $options = ['replacement' => $options];
  969. }
  970. $options += [
  971. 'replacement' => '-',
  972. 'transliteratorId' => null,
  973. 'preserve' => null
  974. ];
  975. if ($options['transliteratorId'] !== false) {
  976. $string = static::transliterate($string, $options['transliteratorId']);
  977. }
  978. $regex = '^\s\p{Ll}\p{Lm}\p{Lo}\p{Lt}\p{Lu}\p{Nd}';
  979. if ($options['preserve']) {
  980. $regex .= '(' . preg_quote($options['preserve'], '/') . ')';
  981. }
  982. $quotedReplacement = preg_quote($options['replacement'], '/');
  983. $map = [
  984. '/[' . $regex . ']/mu' => ' ',
  985. '/[\s]+/mu' => $options['replacement'],
  986. sprintf('/^[%s]+|[%s]+$/', $quotedReplacement, $quotedReplacement) => '',
  987. ];
  988. $string = preg_replace(array_keys($map), $map, $string);
  989. return $string;
  990. }
  991. }