Text.php 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855
  1. <?php
  2. /**
  3. * CakePHP(tm) : Rapid Development Framework (http://cakephp.org)
  4. * Copyright (c) Cake Software Foundation, Inc. (http://cakefoundation.org)
  5. *
  6. * Licensed under The MIT License
  7. * For full copyright and license information, please see the LICENSE.txt
  8. * Redistributions of files must retain the above copyright notice.
  9. *
  10. * @copyright Copyright (c) Cake Software Foundation, Inc. (http://cakefoundation.org)
  11. * @link http://cakephp.org CakePHP(tm) Project
  12. * @since 1.2.0
  13. * @license http://www.opensource.org/licenses/mit-license.php MIT License
  14. */
  15. namespace Cake\Utility;
  16. use InvalidArgumentException;
  17. /**
  18. * Text handling methods.
  19. *
  20. */
  21. class Text
  22. {
  23. /**
  24. * Generate a random UUID version 4
  25. *
  26. * Warning: This method should not be used as a random seed for any cryptographic operations.
  27. * Instead you should use the openssl or mcrypt extensions.
  28. *
  29. * @see http://www.ietf.org/rfc/rfc4122.txt
  30. * @return string RFC 4122 UUID
  31. * @copyright Matt Farina MIT License https://github.com/lootils/uuid/blob/master/LICENSE
  32. */
  33. public static function uuid()
  34. {
  35. return sprintf(
  36. '%04x%04x-%04x-%04x-%04x-%04x%04x%04x',
  37. // 32 bits for "time_low"
  38. mt_rand(0, 65535),
  39. mt_rand(0, 65535),
  40. // 16 bits for "time_mid"
  41. mt_rand(0, 65535),
  42. // 12 bits before the 0100 of (version) 4 for "time_hi_and_version"
  43. mt_rand(0, 4095) | 0x4000,
  44. // 16 bits, 8 bits for "clk_seq_hi_res",
  45. // 8 bits for "clk_seq_low",
  46. // two most significant bits holds zero and one for variant DCE1.1
  47. mt_rand(0, 0x3fff) | 0x8000,
  48. // 48 bits for "node"
  49. mt_rand(0, 65535),
  50. mt_rand(0, 65535),
  51. mt_rand(0, 65535)
  52. );
  53. }
  54. /**
  55. * Tokenizes a string using $separator, ignoring any instance of $separator that appears between
  56. * $leftBound and $rightBound.
  57. *
  58. * @param string $data The data to tokenize.
  59. * @param string $separator The token to split the data on.
  60. * @param string $leftBound The left boundary to ignore separators in.
  61. * @param string $rightBound The right boundary to ignore separators in.
  62. * @return array|string Array of tokens in $data or original input if empty.
  63. */
  64. public static function tokenize($data, $separator = ',', $leftBound = '(', $rightBound = ')')
  65. {
  66. if (empty($data)) {
  67. return [];
  68. }
  69. $depth = 0;
  70. $offset = 0;
  71. $buffer = '';
  72. $results = [];
  73. $length = mb_strlen($data);
  74. $open = false;
  75. while ($offset <= $length) {
  76. $tmpOffset = -1;
  77. $offsets = [
  78. mb_strpos($data, $separator, $offset),
  79. mb_strpos($data, $leftBound, $offset),
  80. mb_strpos($data, $rightBound, $offset)
  81. ];
  82. for ($i = 0; $i < 3; $i++) {
  83. if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) {
  84. $tmpOffset = $offsets[$i];
  85. }
  86. }
  87. if ($tmpOffset !== -1) {
  88. $buffer .= mb_substr($data, $offset, ($tmpOffset - $offset));
  89. $char = mb_substr($data, $tmpOffset, 1);
  90. if (!$depth && $char === $separator) {
  91. $results[] = $buffer;
  92. $buffer = '';
  93. } else {
  94. $buffer .= $char;
  95. }
  96. if ($leftBound !== $rightBound) {
  97. if ($char === $leftBound) {
  98. $depth++;
  99. }
  100. if ($char === $rightBound) {
  101. $depth--;
  102. }
  103. } else {
  104. if ($char === $leftBound) {
  105. if (!$open) {
  106. $depth++;
  107. $open = true;
  108. } else {
  109. $depth--;
  110. }
  111. }
  112. }
  113. $offset = ++$tmpOffset;
  114. } else {
  115. $results[] = $buffer . mb_substr($data, $offset);
  116. $offset = $length + 1;
  117. }
  118. }
  119. if (empty($results) && !empty($buffer)) {
  120. $results[] = $buffer;
  121. }
  122. if (!empty($results)) {
  123. return array_map('trim', $results);
  124. }
  125. return [];
  126. }
  127. /**
  128. * Replaces variable placeholders inside a $str with any given $data. Each key in the $data array
  129. * corresponds to a variable placeholder name in $str.
  130. * Example:
  131. * ```
  132. * Text::insert(':name is :age years old.', ['name' => 'Bob', '65']);
  133. * ```
  134. * Returns: Bob is 65 years old.
  135. *
  136. * Available $options are:
  137. *
  138. * - before: The character or string in front of the name of the variable placeholder (Defaults to `:`)
  139. * - after: The character or string after the name of the variable placeholder (Defaults to null)
  140. * - escape: The character or string used to escape the before character / string (Defaults to `\`)
  141. * - format: A regex to use for matching variable placeholders. Default is: `/(?<!\\)\:%s/`
  142. * (Overwrites before, after, breaks escape / clean)
  143. * - clean: A boolean or array with instructions for Text::cleanInsert
  144. *
  145. * @param string $str A string containing variable placeholders
  146. * @param array $data A key => val array where each key stands for a placeholder variable name
  147. * to be replaced with val
  148. * @param array $options An array of options, see description above
  149. * @return string
  150. */
  151. public static function insert($str, $data, array $options = [])
  152. {
  153. $defaults = [
  154. 'before' => ':', 'after' => null, 'escape' => '\\', 'format' => null, 'clean' => false
  155. ];
  156. $options += $defaults;
  157. $format = $options['format'];
  158. $data = (array)$data;
  159. if (empty($data)) {
  160. return ($options['clean']) ? static::cleanInsert($str, $options) : $str;
  161. }
  162. if (!isset($format)) {
  163. $format = sprintf(
  164. '/(?<!%s)%s%%s%s/',
  165. preg_quote($options['escape'], '/'),
  166. str_replace('%', '%%', preg_quote($options['before'], '/')),
  167. str_replace('%', '%%', preg_quote($options['after'], '/'))
  168. );
  169. }
  170. if (strpos($str, '?') !== false && is_numeric(key($data))) {
  171. $offset = 0;
  172. while (($pos = strpos($str, '?', $offset)) !== false) {
  173. $val = array_shift($data);
  174. $offset = $pos + strlen($val);
  175. $str = substr_replace($str, $val, $pos, 1);
  176. }
  177. return ($options['clean']) ? static::cleanInsert($str, $options) : $str;
  178. }
  179. asort($data);
  180. $dataKeys = array_keys($data);
  181. $hashKeys = array_map('crc32', $dataKeys);
  182. $tempData = array_combine($dataKeys, $hashKeys);
  183. krsort($tempData);
  184. foreach ($tempData as $key => $hashVal) {
  185. $key = sprintf($format, preg_quote($key, '/'));
  186. $str = preg_replace($key, $hashVal, $str);
  187. }
  188. $dataReplacements = array_combine($hashKeys, array_values($data));
  189. foreach ($dataReplacements as $tmpHash => $tmpValue) {
  190. $tmpValue = (is_array($tmpValue)) ? '' : $tmpValue;
  191. $str = str_replace($tmpHash, $tmpValue, $str);
  192. }
  193. if (!isset($options['format']) && isset($options['before'])) {
  194. $str = str_replace($options['escape'] . $options['before'], $options['before'], $str);
  195. }
  196. return ($options['clean']) ? static::cleanInsert($str, $options) : $str;
  197. }
  198. /**
  199. * Cleans up a Text::insert() formatted string with given $options depending on the 'clean' key in
  200. * $options. The default method used is text but html is also available. The goal of this function
  201. * is to replace all whitespace and unneeded markup around placeholders that did not get replaced
  202. * by Text::insert().
  203. *
  204. * @param string $str String to clean.
  205. * @param array $options Options list.
  206. * @return string
  207. * @see \Cake\Utility\Text::insert()
  208. */
  209. public static function cleanInsert($str, array $options)
  210. {
  211. $clean = $options['clean'];
  212. if (!$clean) {
  213. return $str;
  214. }
  215. if ($clean === true) {
  216. $clean = ['method' => 'text'];
  217. }
  218. if (!is_array($clean)) {
  219. $clean = ['method' => $options['clean']];
  220. }
  221. switch ($clean['method']) {
  222. case 'html':
  223. $clean += [
  224. 'word' => '[\w,.]+',
  225. 'andText' => true,
  226. 'replacement' => '',
  227. ];
  228. $kleenex = sprintf(
  229. '/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i',
  230. preg_quote($options['before'], '/'),
  231. $clean['word'],
  232. preg_quote($options['after'], '/')
  233. );
  234. $str = preg_replace($kleenex, $clean['replacement'], $str);
  235. if ($clean['andText']) {
  236. $options['clean'] = ['method' => 'text'];
  237. $str = static::cleanInsert($str, $options);
  238. }
  239. break;
  240. case 'text':
  241. $clean += [
  242. 'word' => '[\w,.]+',
  243. 'gap' => '[\s]*(?:(?:and|or)[\s]*)?',
  244. 'replacement' => '',
  245. ];
  246. $kleenex = sprintf(
  247. '/(%s%s%s%s|%s%s%s%s)/',
  248. preg_quote($options['before'], '/'),
  249. $clean['word'],
  250. preg_quote($options['after'], '/'),
  251. $clean['gap'],
  252. $clean['gap'],
  253. preg_quote($options['before'], '/'),
  254. $clean['word'],
  255. preg_quote($options['after'], '/')
  256. );
  257. $str = preg_replace($kleenex, $clean['replacement'], $str);
  258. break;
  259. }
  260. return $str;
  261. }
  262. /**
  263. * Wraps text to a specific width, can optionally wrap at word breaks.
  264. *
  265. * ### Options
  266. *
  267. * - `width` The width to wrap to. Defaults to 72.
  268. * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
  269. * - `indent` String to indent with. Defaults to null.
  270. * - `indentAt` 0 based index to start indenting at. Defaults to 0.
  271. *
  272. * @param string $text The text to format.
  273. * @param array|int $options Array of options to use, or an integer to wrap the text to.
  274. * @return string Formatted text.
  275. */
  276. public static function wrap($text, $options = [])
  277. {
  278. if (is_numeric($options)) {
  279. $options = ['width' => $options];
  280. }
  281. $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
  282. if ($options['wordWrap']) {
  283. $wrapped = self::wordWrap($text, $options['width'], "\n");
  284. } else {
  285. $wrapped = trim(chunk_split($text, $options['width'] - 1, "\n"));
  286. }
  287. if (!empty($options['indent'])) {
  288. $chunks = explode("\n", $wrapped);
  289. for ($i = $options['indentAt'], $len = count($chunks); $i < $len; $i++) {
  290. $chunks[$i] = $options['indent'] . $chunks[$i];
  291. }
  292. $wrapped = implode("\n", $chunks);
  293. }
  294. return $wrapped;
  295. }
  296. /**
  297. * Wraps a complete block of text to a specific width, can optionally wrap
  298. * at word breaks.
  299. *
  300. * ### Options
  301. *
  302. * - `width` The width to wrap to. Defaults to 72.
  303. * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
  304. * - `indent` String to indent with. Defaults to null.
  305. * - `indentAt` 0 based index to start indenting at. Defaults to 0.
  306. *
  307. * @param string $text The text to format.
  308. * @param array|int $options Array of options to use, or an integer to wrap the text to.
  309. * @return string Formatted text.
  310. */
  311. public static function wrapBlock($text, $options = [])
  312. {
  313. if (is_numeric($options)) {
  314. $options = ['width' => $options];
  315. }
  316. $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
  317. if (!empty($options['indentAt']) && $options['indentAt'] === 0) {
  318. $indentLength = !empty($options['indent']) ? strlen($options['indent']) : 0;
  319. $options['width'] = $options['width'] - $indentLength;
  320. return self::wrap($text, $options);
  321. }
  322. $wrapped = self::wrap($text, $options);
  323. if (!empty($options['indent'])) {
  324. $indentationLength = mb_strlen($options['indent']);
  325. $chunks = explode("\n", $wrapped);
  326. $count = count($chunks);
  327. if ($count < 2) {
  328. return $wrapped;
  329. }
  330. $toRewrap = '';
  331. for ($i = $options['indentAt']; $i < $count; $i++) {
  332. $toRewrap .= mb_substr($chunks[$i], $indentationLength) . ' ';
  333. unset($chunks[$i]);
  334. }
  335. $options['width'] -= $indentationLength;
  336. $options['indentAt'] = 0;
  337. $rewrapped = self::wrap($toRewrap, $options);
  338. $newChunks = explode("\n", $rewrapped);
  339. $chunks = array_merge($chunks, $newChunks);
  340. $wrapped = implode("\n", $chunks);
  341. }
  342. return $wrapped;
  343. }
  344. /**
  345. * Unicode and newline aware version of wordwrap.
  346. *
  347. * @param string $text The text to format.
  348. * @param int $width The width to wrap to. Defaults to 72.
  349. * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
  350. * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
  351. * @return string Formatted text.
  352. */
  353. public static function wordWrap($text, $width = 72, $break = "\n", $cut = false)
  354. {
  355. $paragraphs = explode($break, $text);
  356. foreach ($paragraphs as &$paragraph) {
  357. $paragraph = static::_wordWrap($paragraph, $width, $break, $cut);
  358. }
  359. return implode($break, $paragraphs);
  360. }
  361. /**
  362. * Unicode aware version of wordwrap as helper method.
  363. *
  364. * @param string $text The text to format.
  365. * @param int $width The width to wrap to. Defaults to 72.
  366. * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
  367. * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
  368. * @return string Formatted text.
  369. */
  370. protected static function _wordWrap($text, $width = 72, $break = "\n", $cut = false)
  371. {
  372. if ($cut) {
  373. $parts = [];
  374. while (mb_strlen($text) > 0) {
  375. $part = mb_substr($text, 0, $width);
  376. $parts[] = trim($part);
  377. $text = trim(mb_substr($text, mb_strlen($part)));
  378. }
  379. return implode($break, $parts);
  380. }
  381. $parts = [];
  382. while (mb_strlen($text) > 0) {
  383. if ($width >= mb_strlen($text)) {
  384. $parts[] = trim($text);
  385. break;
  386. }
  387. $part = mb_substr($text, 0, $width);
  388. $nextChar = mb_substr($text, $width, 1);
  389. if ($nextChar !== ' ') {
  390. $breakAt = mb_strrpos($part, ' ');
  391. if ($breakAt === false) {
  392. $breakAt = mb_strpos($text, ' ', $width);
  393. }
  394. if ($breakAt === false) {
  395. $parts[] = trim($text);
  396. break;
  397. }
  398. $part = mb_substr($text, 0, $breakAt);
  399. }
  400. $part = trim($part);
  401. $parts[] = $part;
  402. $text = trim(mb_substr($text, mb_strlen($part)));
  403. }
  404. return implode($break, $parts);
  405. }
  406. /**
  407. * Highlights a given phrase in a text. You can specify any expression in highlighter that
  408. * may include the \1 expression to include the $phrase found.
  409. *
  410. * ### Options:
  411. *
  412. * - `format` The piece of HTML with that the phrase will be highlighted
  413. * - `html` If true, will ignore any HTML tags, ensuring that only the correct text is highlighted
  414. * - `regex` a custom regex rule that is used to match words, default is '|$tag|iu'
  415. *
  416. * @param string $text Text to search the phrase in.
  417. * @param string|array $phrase The phrase or phrases that will be searched.
  418. * @param array $options An array of HTML attributes and options.
  419. * @return string The highlighted text
  420. * @link http://book.cakephp.org/3.0/en/core-libraries/string.html#highlighting-substrings
  421. */
  422. public static function highlight($text, $phrase, array $options = [])
  423. {
  424. if (empty($phrase)) {
  425. return $text;
  426. }
  427. $defaults = [
  428. 'format' => '<span class="highlight">\1</span>',
  429. 'html' => false,
  430. 'regex' => "|%s|iu"
  431. ];
  432. $options += $defaults;
  433. extract($options);
  434. if (is_array($phrase)) {
  435. $replace = [];
  436. $with = [];
  437. foreach ($phrase as $key => $segment) {
  438. $segment = '(' . preg_quote($segment, '|') . ')';
  439. if ($html) {
  440. $segment = "(?![^<]+>)$segment(?![^<]+>)";
  441. }
  442. $with[] = (is_array($format)) ? $format[$key] : $format;
  443. $replace[] = sprintf($options['regex'], $segment);
  444. }
  445. return preg_replace($replace, $with, $text);
  446. }
  447. $phrase = '(' . preg_quote($phrase, '|') . ')';
  448. if ($html) {
  449. $phrase = "(?![^<]+>)$phrase(?![^<]+>)";
  450. }
  451. return preg_replace(sprintf($options['regex'], $phrase), $format, $text);
  452. }
  453. /**
  454. * Strips given text of all links (<a href=....).
  455. *
  456. * @param string $text Text
  457. * @return string The text without links
  458. */
  459. public static function stripLinks($text)
  460. {
  461. return preg_replace('|<a\s+[^>]+>|im', '', preg_replace('|<\/a>|im', '', $text));
  462. }
  463. /**
  464. * Truncates text starting from the end.
  465. *
  466. * Cuts a string to the length of $length and replaces the first characters
  467. * with the ellipsis if the text is longer than length.
  468. *
  469. * ### Options:
  470. *
  471. * - `ellipsis` Will be used as Beginning and prepended to the trimmed string
  472. * - `exact` If false, $text will not be cut mid-word
  473. *
  474. * @param string $text String to truncate.
  475. * @param int $length Length of returned string, including ellipsis.
  476. * @param array $options An array of options.
  477. * @return string Trimmed string.
  478. */
  479. public static function tail($text, $length = 100, array $options = [])
  480. {
  481. $default = [
  482. 'ellipsis' => '...', 'exact' => true
  483. ];
  484. $options += $default;
  485. extract($options);
  486. if (mb_strlen($text) <= $length) {
  487. return $text;
  488. }
  489. $truncate = mb_substr($text, mb_strlen($text) - $length + mb_strlen($ellipsis));
  490. if (!$exact) {
  491. $spacepos = mb_strpos($truncate, ' ');
  492. $truncate = $spacepos === false ? '' : trim(mb_substr($truncate, $spacepos));
  493. }
  494. return $ellipsis . $truncate;
  495. }
  496. /**
  497. * Truncates text.
  498. *
  499. * Cuts a string to the length of $length and replaces the last characters
  500. * with the ellipsis if the text is longer than length.
  501. *
  502. * ### Options:
  503. *
  504. * - `ellipsis` Will be used as ending and appended to the trimmed string
  505. * - `exact` If false, $text will not be cut mid-word
  506. * - `html` If true, HTML tags would be handled correctly
  507. *
  508. * @param string $text String to truncate.
  509. * @param int $length Length of returned string, including ellipsis.
  510. * @param array $options An array of HTML attributes and options.
  511. * @return string Trimmed string.
  512. * @link http://book.cakephp.org/3.0/en/core-libraries/string.html#truncating-text
  513. */
  514. public static function truncate($text, $length = 100, array $options = [])
  515. {
  516. $default = [
  517. 'ellipsis' => '...', 'exact' => true, 'html' => false
  518. ];
  519. if (!empty($options['html']) && strtolower(mb_internal_encoding()) === 'utf-8') {
  520. $default['ellipsis'] = "\xe2\x80\xa6";
  521. }
  522. $options += $default;
  523. extract($options);
  524. if ($html) {
  525. if (mb_strlen(preg_replace('/<.*?>/', '', $text)) <= $length) {
  526. return $text;
  527. }
  528. $totalLength = mb_strlen(strip_tags($ellipsis));
  529. $openTags = [];
  530. $truncate = '';
  531. preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER);
  532. foreach ($tags as $tag) {
  533. if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/s', $tag[2])) {
  534. if (preg_match('/<[\w]+[^>]*>/s', $tag[0])) {
  535. array_unshift($openTags, $tag[2]);
  536. } elseif (preg_match('/<\/([\w]+)[^>]*>/s', $tag[0], $closeTag)) {
  537. $pos = array_search($closeTag[1], $openTags);
  538. if ($pos !== false) {
  539. array_splice($openTags, $pos, 1);
  540. }
  541. }
  542. }
  543. $truncate .= $tag[1];
  544. $contentLength = mb_strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $tag[3]));
  545. if ($contentLength + $totalLength > $length) {
  546. $left = $length - $totalLength;
  547. $entitiesLength = 0;
  548. if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $tag[3], $entities, PREG_OFFSET_CAPTURE)) {
  549. foreach ($entities[0] as $entity) {
  550. if ($entity[1] + 1 - $entitiesLength <= $left) {
  551. $left--;
  552. $entitiesLength += mb_strlen($entity[0]);
  553. } else {
  554. break;
  555. }
  556. }
  557. }
  558. $truncate .= mb_substr($tag[3], 0, $left + $entitiesLength);
  559. break;
  560. }
  561. $truncate .= $tag[3];
  562. $totalLength += $contentLength;
  563. if ($totalLength >= $length) {
  564. break;
  565. }
  566. }
  567. } else {
  568. if (mb_strlen($text) <= $length) {
  569. return $text;
  570. }
  571. $truncate = mb_substr($text, 0, $length - mb_strlen($ellipsis));
  572. }
  573. if (!$exact) {
  574. $spacepos = mb_strrpos($truncate, ' ');
  575. if ($html) {
  576. $truncateCheck = mb_substr($truncate, 0, $spacepos);
  577. $lastOpenTag = mb_strrpos($truncateCheck, '<');
  578. $lastCloseTag = mb_strrpos($truncateCheck, '>');
  579. if ($lastOpenTag > $lastCloseTag) {
  580. preg_match_all('/<[\w]+[^>]*>/s', $truncate, $lastTagMatches);
  581. $lastTag = array_pop($lastTagMatches[0]);
  582. $spacepos = mb_strrpos($truncate, $lastTag) + mb_strlen($lastTag);
  583. }
  584. $bits = mb_substr($truncate, $spacepos);
  585. preg_match_all('/<\/([a-z]+)>/', $bits, $droppedTags, PREG_SET_ORDER);
  586. if (!empty($droppedTags)) {
  587. if (!empty($openTags)) {
  588. foreach ($droppedTags as $closingTag) {
  589. if (!in_array($closingTag[1], $openTags)) {
  590. array_unshift($openTags, $closingTag[1]);
  591. }
  592. }
  593. } else {
  594. foreach ($droppedTags as $closingTag) {
  595. $openTags[] = $closingTag[1];
  596. }
  597. }
  598. }
  599. }
  600. $truncate = mb_substr($truncate, 0, $spacepos);
  601. // If truncate still empty, then we don't need to count ellipsis in the cut.
  602. if (mb_strlen($truncate) === 0) {
  603. $truncate = mb_substr($text, 0, $length);
  604. }
  605. }
  606. $truncate .= $ellipsis;
  607. if ($html) {
  608. foreach ($openTags as $tag) {
  609. $truncate .= '</' . $tag . '>';
  610. }
  611. }
  612. return $truncate;
  613. }
  614. /**
  615. * Extracts an excerpt from the text surrounding the phrase with a number of characters on each side
  616. * determined by radius.
  617. *
  618. * @param string $text String to search the phrase in
  619. * @param string $phrase Phrase that will be searched for
  620. * @param int $radius The amount of characters that will be returned on each side of the founded phrase
  621. * @param string $ellipsis Ending that will be appended
  622. * @return string Modified string
  623. * @link http://book.cakephp.org/3.0/en/core-libraries/string.html#extracting-an-excerpt
  624. */
  625. public static function excerpt($text, $phrase, $radius = 100, $ellipsis = '...')
  626. {
  627. if (empty($text) || empty($phrase)) {
  628. return static::truncate($text, $radius * 2, ['ellipsis' => $ellipsis]);
  629. }
  630. $append = $prepend = $ellipsis;
  631. $phraseLen = mb_strlen($phrase);
  632. $textLen = mb_strlen($text);
  633. $pos = mb_strpos(mb_strtolower($text), mb_strtolower($phrase));
  634. if ($pos === false) {
  635. return mb_substr($text, 0, $radius) . $ellipsis;
  636. }
  637. $startPos = $pos - $radius;
  638. if ($startPos <= 0) {
  639. $startPos = 0;
  640. $prepend = '';
  641. }
  642. $endPos = $pos + $phraseLen + $radius;
  643. if ($endPos >= $textLen) {
  644. $endPos = $textLen;
  645. $append = '';
  646. }
  647. $excerpt = mb_substr($text, $startPos, $endPos - $startPos);
  648. $excerpt = $prepend . $excerpt . $append;
  649. return $excerpt;
  650. }
  651. /**
  652. * Creates a comma separated list where the last two items are joined with 'and', forming natural language.
  653. *
  654. * @param array $list The list to be joined.
  655. * @param string $and The word used to join the last and second last items together with. Defaults to 'and'.
  656. * @param string $separator The separator used to join all the other items together. Defaults to ', '.
  657. * @return string The glued together string.
  658. * @link http://book.cakephp.org/3.0/en/core-libraries/string.html#converting-an-array-to-sentence-form
  659. */
  660. public static function toList(array $list, $and = null, $separator = ', ')
  661. {
  662. if ($and === null) {
  663. $and = __d('cake', 'and');
  664. }
  665. if (count($list) > 1) {
  666. return implode($separator, array_slice($list, null, -1)) . ' ' . $and . ' ' . array_pop($list);
  667. }
  668. return array_pop($list);
  669. }
  670. /**
  671. * Check if the string contain multibyte characters
  672. *
  673. * @param string $string value to test
  674. * @return bool
  675. */
  676. public static function isMultibyte($string)
  677. {
  678. $length = strlen($string);
  679. for ($i = 0; $i < $length; $i++) {
  680. $value = ord(($string[$i]));
  681. if ($value > 128) {
  682. return true;
  683. }
  684. }
  685. return false;
  686. }
  687. /**
  688. * Converts a multibyte character string
  689. * to the decimal value of the character
  690. *
  691. * @param string $string String to convert.
  692. * @return array
  693. */
  694. public static function utf8($string)
  695. {
  696. $map = [];
  697. $values = [];
  698. $find = 1;
  699. $length = strlen($string);
  700. for ($i = 0; $i < $length; $i++) {
  701. $value = ord($string[$i]);
  702. if ($value < 128) {
  703. $map[] = $value;
  704. } else {
  705. if (empty($values)) {
  706. $find = ($value < 224) ? 2 : 3;
  707. }
  708. $values[] = $value;
  709. if (count($values) === $find) {
  710. if ($find == 3) {
  711. $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
  712. } else {
  713. $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
  714. }
  715. $values = [];
  716. $find = 1;
  717. }
  718. }
  719. }
  720. return $map;
  721. }
  722. /**
  723. * Converts the decimal value of a multibyte character string
  724. * to a string
  725. *
  726. * @param array $array Array
  727. * @return string
  728. */
  729. public static function ascii(array $array)
  730. {
  731. $ascii = '';
  732. foreach ($array as $utf8) {
  733. if ($utf8 < 128) {
  734. $ascii .= chr($utf8);
  735. } elseif ($utf8 < 2048) {
  736. $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
  737. $ascii .= chr(128 + ($utf8 % 64));
  738. } else {
  739. $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
  740. $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
  741. $ascii .= chr(128 + ($utf8 % 64));
  742. }
  743. }
  744. return $ascii;
  745. }
  746. /**
  747. * Converts filesize from human readable string to bytes
  748. *
  749. * @param string $size Size in human readable string like '5MB', '5M', '500B', '50kb' etc.
  750. * @param mixed $default Value to be returned when invalid size was used, for example 'Unknown type'
  751. * @return mixed Number of bytes as integer on success, `$default` on failure if not false
  752. * @throws \InvalidArgumentException On invalid Unit type.
  753. * @link http://book.cakephp.org/3.0/en/core-libraries/helpers/text.html
  754. */
  755. public static function parseFileSize($size, $default = false)
  756. {
  757. if (ctype_digit($size)) {
  758. return (int)$size;
  759. }
  760. $size = strtoupper($size);
  761. $l = -2;
  762. $i = array_search(substr($size, -2), ['KB', 'MB', 'GB', 'TB', 'PB']);
  763. if ($i === false) {
  764. $l = -1;
  765. $i = array_search(substr($size, -1), ['K', 'M', 'G', 'T', 'P']);
  766. }
  767. if ($i !== false) {
  768. $size = substr($size, 0, $l);
  769. return $size * pow(1024, $i + 1);
  770. }
  771. if (substr($size, -1) === 'B' && ctype_digit(substr($size, 0, -1))) {
  772. $size = substr($size, 0, -1);
  773. return (int)$size;
  774. }
  775. if ($default !== false) {
  776. return $default;
  777. }
  778. throw new InvalidArgumentException('No unit type.');
  779. }
  780. }