Text.php 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792
  1. <?php
  2. /**
  3. * CakePHP(tm) : Rapid Development Framework (http://cakephp.org)
  4. * Copyright (c) Cake Software Foundation, Inc. (http://cakefoundation.org)
  5. *
  6. * Licensed under The MIT License
  7. * For full copyright and license information, please see the LICENSE.txt
  8. * Redistributions of files must retain the above copyright notice.
  9. *
  10. * @copyright Copyright (c) Cake Software Foundation, Inc. (http://cakefoundation.org)
  11. * @link http://cakephp.org CakePHP(tm) Project
  12. * @since 1.2.0
  13. * @license http://www.opensource.org/licenses/mit-license.php MIT License
  14. */
  15. namespace Cake\Utility;
  16. use InvalidArgumentException;
  17. /**
  18. * Text handling methods.
  19. *
  20. */
  21. class Text
  22. {
  23. /**
  24. * Generate a random UUID version 4
  25. *
  26. * Warning: This method should not be used as a random seed for any cryptographic operations.
  27. * Instead you should use the openssl or mcrypt extensions.
  28. *
  29. * @see http://www.ietf.org/rfc/rfc4122.txt
  30. * @return string RFC 4122 UUID
  31. * @copyright Matt Farina MIT License https://github.com/lootils/uuid/blob/master/LICENSE
  32. */
  33. public static function uuid()
  34. {
  35. return sprintf(
  36. '%04x%04x-%04x-%04x-%04x-%04x%04x%04x',
  37. // 32 bits for "time_low"
  38. mt_rand(0, 65535),
  39. mt_rand(0, 65535),
  40. // 16 bits for "time_mid"
  41. mt_rand(0, 65535),
  42. // 12 bits before the 0100 of (version) 4 for "time_hi_and_version"
  43. mt_rand(0, 4095) | 0x4000,
  44. // 16 bits, 8 bits for "clk_seq_hi_res",
  45. // 8 bits for "clk_seq_low",
  46. // two most significant bits holds zero and one for variant DCE1.1
  47. mt_rand(0, 0x3fff) | 0x8000,
  48. // 48 bits for "node"
  49. mt_rand(0, 65535),
  50. mt_rand(0, 65535),
  51. mt_rand(0, 65535)
  52. );
  53. }
  54. /**
  55. * Tokenizes a string using $separator, ignoring any instance of $separator that appears between
  56. * $leftBound and $rightBound.
  57. *
  58. * @param string $data The data to tokenize.
  59. * @param string $separator The token to split the data on.
  60. * @param string $leftBound The left boundary to ignore separators in.
  61. * @param string $rightBound The right boundary to ignore separators in.
  62. * @return mixed Array of tokens in $data or original input if empty.
  63. */
  64. public static function tokenize($data, $separator = ',', $leftBound = '(', $rightBound = ')')
  65. {
  66. if (empty($data)) {
  67. return [];
  68. }
  69. $depth = 0;
  70. $offset = 0;
  71. $buffer = '';
  72. $results = [];
  73. $length = strlen($data);
  74. $open = false;
  75. while ($offset <= $length) {
  76. $tmpOffset = -1;
  77. $offsets = [
  78. strpos($data, $separator, $offset),
  79. strpos($data, $leftBound, $offset),
  80. strpos($data, $rightBound, $offset)
  81. ];
  82. for ($i = 0; $i < 3; $i++) {
  83. if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) {
  84. $tmpOffset = $offsets[$i];
  85. }
  86. }
  87. if ($tmpOffset !== -1) {
  88. $buffer .= substr($data, $offset, ($tmpOffset - $offset));
  89. if (!$depth && $data{$tmpOffset} === $separator) {
  90. $results[] = $buffer;
  91. $buffer = '';
  92. } else {
  93. $buffer .= $data{$tmpOffset};
  94. }
  95. if ($leftBound !== $rightBound) {
  96. if ($data{$tmpOffset} === $leftBound) {
  97. $depth++;
  98. }
  99. if ($data{$tmpOffset} === $rightBound) {
  100. $depth--;
  101. }
  102. } else {
  103. if ($data{$tmpOffset} === $leftBound) {
  104. if (!$open) {
  105. $depth++;
  106. $open = true;
  107. } else {
  108. $depth--;
  109. }
  110. }
  111. }
  112. $offset = ++$tmpOffset;
  113. } else {
  114. $results[] = $buffer . substr($data, $offset);
  115. $offset = $length + 1;
  116. }
  117. }
  118. if (empty($results) && !empty($buffer)) {
  119. $results[] = $buffer;
  120. }
  121. if (!empty($results)) {
  122. return array_map('trim', $results);
  123. }
  124. return [];
  125. }
  126. /**
  127. * Replaces variable placeholders inside a $str with any given $data. Each key in the $data array
  128. * corresponds to a variable placeholder name in $str.
  129. * Example: `Text::insert(':name is :age years old.', ['name' => 'Bob', '65']);`
  130. * Returns: Bob is 65 years old.
  131. *
  132. * Available $options are:
  133. *
  134. * - before: The character or string in front of the name of the variable placeholder (Defaults to `:`)
  135. * - after: The character or string after the name of the variable placeholder (Defaults to null)
  136. * - escape: The character or string used to escape the before character / string (Defaults to `\`)
  137. * - format: A regex to use for matching variable placeholders. Default is: `/(?<!\\)\:%s/`
  138. * (Overwrites before, after, breaks escape / clean)
  139. * - clean: A boolean or array with instructions for Text::cleanInsert
  140. *
  141. * @param string $str A string containing variable placeholders
  142. * @param array $data A key => val array where each key stands for a placeholder variable name
  143. * to be replaced with val
  144. * @param array $options An array of options, see description above
  145. * @return string
  146. */
  147. public static function insert($str, $data, array $options = [])
  148. {
  149. $defaults = [
  150. 'before' => ':', 'after' => null, 'escape' => '\\', 'format' => null, 'clean' => false
  151. ];
  152. $options += $defaults;
  153. $format = $options['format'];
  154. $data = (array)$data;
  155. if (empty($data)) {
  156. return ($options['clean']) ? static::cleanInsert($str, $options) : $str;
  157. }
  158. if (!isset($format)) {
  159. $format = sprintf(
  160. '/(?<!%s)%s%%s%s/',
  161. preg_quote($options['escape'], '/'),
  162. str_replace('%', '%%', preg_quote($options['before'], '/')),
  163. str_replace('%', '%%', preg_quote($options['after'], '/'))
  164. );
  165. }
  166. if (strpos($str, '?') !== false && is_numeric(key($data))) {
  167. $offset = 0;
  168. while (($pos = strpos($str, '?', $offset)) !== false) {
  169. $val = array_shift($data);
  170. $offset = $pos + strlen($val);
  171. $str = substr_replace($str, $val, $pos, 1);
  172. }
  173. return ($options['clean']) ? static::cleanInsert($str, $options) : $str;
  174. }
  175. asort($data);
  176. $dataKeys = array_keys($data);
  177. $hashKeys = array_map('crc32', $dataKeys);
  178. $tempData = array_combine($dataKeys, $hashKeys);
  179. krsort($tempData);
  180. foreach ($tempData as $key => $hashVal) {
  181. $key = sprintf($format, preg_quote($key, '/'));
  182. $str = preg_replace($key, $hashVal, $str);
  183. }
  184. $dataReplacements = array_combine($hashKeys, array_values($data));
  185. foreach ($dataReplacements as $tmpHash => $tmpValue) {
  186. $tmpValue = (is_array($tmpValue)) ? '' : $tmpValue;
  187. $str = str_replace($tmpHash, $tmpValue, $str);
  188. }
  189. if (!isset($options['format']) && isset($options['before'])) {
  190. $str = str_replace($options['escape'] . $options['before'], $options['before'], $str);
  191. }
  192. return ($options['clean']) ? static::cleanInsert($str, $options) : $str;
  193. }
  194. /**
  195. * Cleans up a Text::insert() formatted string with given $options depending on the 'clean' key in
  196. * $options. The default method used is text but html is also available. The goal of this function
  197. * is to replace all whitespace and unneeded markup around placeholders that did not get replaced
  198. * by Text::insert().
  199. *
  200. * @param string $str String to clean.
  201. * @param array $options Options list.
  202. * @return string
  203. * @see \Cake\Utility\Text::insert()
  204. */
  205. public static function cleanInsert($str, array $options)
  206. {
  207. $clean = $options['clean'];
  208. if (!$clean) {
  209. return $str;
  210. }
  211. if ($clean === true) {
  212. $clean = ['method' => 'text'];
  213. }
  214. if (!is_array($clean)) {
  215. $clean = ['method' => $options['clean']];
  216. }
  217. switch ($clean['method']) {
  218. case 'html':
  219. $clean += [
  220. 'word' => '[\w,.]+',
  221. 'andText' => true,
  222. 'replacement' => '',
  223. ];
  224. $kleenex = sprintf(
  225. '/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i',
  226. preg_quote($options['before'], '/'),
  227. $clean['word'],
  228. preg_quote($options['after'], '/')
  229. );
  230. $str = preg_replace($kleenex, $clean['replacement'], $str);
  231. if ($clean['andText']) {
  232. $options['clean'] = ['method' => 'text'];
  233. $str = static::cleanInsert($str, $options);
  234. }
  235. break;
  236. case 'text':
  237. $clean += [
  238. 'word' => '[\w,.]+',
  239. 'gap' => '[\s]*(?:(?:and|or)[\s]*)?',
  240. 'replacement' => '',
  241. ];
  242. $kleenex = sprintf(
  243. '/(%s%s%s%s|%s%s%s%s)/',
  244. preg_quote($options['before'], '/'),
  245. $clean['word'],
  246. preg_quote($options['after'], '/'),
  247. $clean['gap'],
  248. $clean['gap'],
  249. preg_quote($options['before'], '/'),
  250. $clean['word'],
  251. preg_quote($options['after'], '/')
  252. );
  253. $str = preg_replace($kleenex, $clean['replacement'], $str);
  254. break;
  255. }
  256. return $str;
  257. }
  258. /**
  259. * Wraps text to a specific width, can optionally wrap at word breaks.
  260. *
  261. * ### Options
  262. *
  263. * - `width` The width to wrap to. Defaults to 72.
  264. * - `wordWrap` Only wrap on words breaks (spaces) Defaults to true.
  265. * - `indent` String to indent with. Defaults to null.
  266. * - `indentAt` 0 based index to start indenting at. Defaults to 0.
  267. *
  268. * @param string $text The text to format.
  269. * @param array|int $options Array of options to use, or an integer to wrap the text to.
  270. * @return string Formatted text.
  271. */
  272. public static function wrap($text, $options = [])
  273. {
  274. if (is_numeric($options)) {
  275. $options = ['width' => $options];
  276. }
  277. $options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0];
  278. if ($options['wordWrap']) {
  279. $wrapped = self::wordWrap($text, $options['width'], "\n");
  280. } else {
  281. $wrapped = trim(chunk_split($text, $options['width'] - 1, "\n"));
  282. }
  283. if (!empty($options['indent'])) {
  284. $chunks = explode("\n", $wrapped);
  285. for ($i = $options['indentAt'], $len = count($chunks); $i < $len; $i++) {
  286. $chunks[$i] = $options['indent'] . $chunks[$i];
  287. }
  288. $wrapped = implode("\n", $chunks);
  289. }
  290. return $wrapped;
  291. }
  292. /**
  293. * Unicode and newline aware version of wordwrap.
  294. *
  295. * @param string $text The text to format.
  296. * @param int $width The width to wrap to. Defaults to 72.
  297. * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
  298. * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
  299. * @return string Formatted text.
  300. */
  301. public static function wordWrap($text, $width = 72, $break = "\n", $cut = false)
  302. {
  303. $paragraphs = explode($break, $text);
  304. foreach ($paragraphs as &$paragraph) {
  305. $paragraph = static::_wordWrap($paragraph, $width, $break, $cut);
  306. }
  307. return implode($break, $paragraphs);
  308. }
  309. /**
  310. * Unicode aware version of wordwrap as helper method.
  311. *
  312. * @param string $text The text to format.
  313. * @param int $width The width to wrap to. Defaults to 72.
  314. * @param string $break The line is broken using the optional break parameter. Defaults to '\n'.
  315. * @param bool $cut If the cut is set to true, the string is always wrapped at the specified width.
  316. * @return string Formatted text.
  317. */
  318. protected static function _wordWrap($text, $width = 72, $break = "\n", $cut = false)
  319. {
  320. if ($cut) {
  321. $parts = [];
  322. while (mb_strlen($text) > 0) {
  323. $part = mb_substr($text, 0, $width);
  324. $parts[] = trim($part);
  325. $text = trim(mb_substr($text, mb_strlen($part)));
  326. }
  327. return implode($break, $parts);
  328. }
  329. $parts = [];
  330. while (mb_strlen($text) > 0) {
  331. if ($width >= mb_strlen($text)) {
  332. $parts[] = trim($text);
  333. break;
  334. }
  335. $part = mb_substr($text, 0, $width);
  336. $nextChar = mb_substr($text, $width, 1);
  337. if ($nextChar !== ' ') {
  338. $breakAt = mb_strrpos($part, ' ');
  339. if ($breakAt === false) {
  340. $breakAt = mb_strpos($text, ' ', $width);
  341. }
  342. if ($breakAt === false) {
  343. $parts[] = trim($text);
  344. break;
  345. }
  346. $part = mb_substr($text, 0, $breakAt);
  347. }
  348. $part = trim($part);
  349. $parts[] = $part;
  350. $text = trim(mb_substr($text, mb_strlen($part)));
  351. }
  352. return implode($break, $parts);
  353. }
  354. /**
  355. * Highlights a given phrase in a text. You can specify any expression in highlighter that
  356. * may include the \1 expression to include the $phrase found.
  357. *
  358. * ### Options:
  359. *
  360. * - `format` The piece of HTML with that the phrase will be highlighted
  361. * - `html` If true, will ignore any HTML tags, ensuring that only the correct text is highlighted
  362. * - `regex` a custom regex rule that is used to match words, default is '|$tag|iu'
  363. *
  364. * @param string $text Text to search the phrase in.
  365. * @param string|array $phrase The phrase or phrases that will be searched.
  366. * @param array $options An array of HTML attributes and options.
  367. * @return string The highlighted text
  368. * @link http://book.cakephp.org/3.0/en/core-libraries/string.html#highlighting-substrings
  369. */
  370. public static function highlight($text, $phrase, array $options = [])
  371. {
  372. if (empty($phrase)) {
  373. return $text;
  374. }
  375. $defaults = [
  376. 'format' => '<span class="highlight">\1</span>',
  377. 'html' => false,
  378. 'regex' => "|%s|iu"
  379. ];
  380. $options += $defaults;
  381. extract($options);
  382. if (is_array($phrase)) {
  383. $replace = [];
  384. $with = [];
  385. foreach ($phrase as $key => $segment) {
  386. $segment = '(' . preg_quote($segment, '|') . ')';
  387. if ($html) {
  388. $segment = "(?![^<]+>)$segment(?![^<]+>)";
  389. }
  390. $with[] = (is_array($format)) ? $format[$key] : $format;
  391. $replace[] = sprintf($options['regex'], $segment);
  392. }
  393. return preg_replace($replace, $with, $text);
  394. }
  395. $phrase = '(' . preg_quote($phrase, '|') . ')';
  396. if ($html) {
  397. $phrase = "(?![^<]+>)$phrase(?![^<]+>)";
  398. }
  399. return preg_replace(sprintf($options['regex'], $phrase), $format, $text);
  400. }
  401. /**
  402. * Strips given text of all links (<a href=....).
  403. *
  404. * @param string $text Text
  405. * @return string The text without links
  406. */
  407. public static function stripLinks($text)
  408. {
  409. return preg_replace('|<a\s+[^>]+>|im', '', preg_replace('|<\/a>|im', '', $text));
  410. }
  411. /**
  412. * Truncates text starting from the end.
  413. *
  414. * Cuts a string to the length of $length and replaces the first characters
  415. * with the ellipsis if the text is longer than length.
  416. *
  417. * ### Options:
  418. *
  419. * - `ellipsis` Will be used as Beginning and prepended to the trimmed string
  420. * - `exact` If false, $text will not be cut mid-word
  421. *
  422. * @param string $text String to truncate.
  423. * @param int $length Length of returned string, including ellipsis.
  424. * @param array $options An array of options.
  425. * @return string Trimmed string.
  426. */
  427. public static function tail($text, $length = 100, array $options = [])
  428. {
  429. $default = [
  430. 'ellipsis' => '...', 'exact' => true
  431. ];
  432. $options += $default;
  433. extract($options);
  434. if (mb_strlen($text) <= $length) {
  435. return $text;
  436. }
  437. $truncate = mb_substr($text, mb_strlen($text) - $length + mb_strlen($ellipsis));
  438. if (!$exact) {
  439. $spacepos = mb_strpos($truncate, ' ');
  440. $truncate = $spacepos === false ? '' : trim(mb_substr($truncate, $spacepos));
  441. }
  442. return $ellipsis . $truncate;
  443. }
  444. /**
  445. * Truncates text.
  446. *
  447. * Cuts a string to the length of $length and replaces the last characters
  448. * with the ellipsis if the text is longer than length.
  449. *
  450. * ### Options:
  451. *
  452. * - `ellipsis` Will be used as ending and appended to the trimmed string
  453. * - `exact` If false, $text will not be cut mid-word
  454. * - `html` If true, HTML tags would be handled correctly
  455. *
  456. * @param string $text String to truncate.
  457. * @param int $length Length of returned string, including ellipsis.
  458. * @param array $options An array of HTML attributes and options.
  459. * @return string Trimmed string.
  460. * @link http://book.cakephp.org/3.0/en/core-libraries/string.html#truncating-text
  461. */
  462. public static function truncate($text, $length = 100, array $options = [])
  463. {
  464. $default = [
  465. 'ellipsis' => '...', 'exact' => true, 'html' => false
  466. ];
  467. if (!empty($options['html']) && strtolower(mb_internal_encoding()) === 'utf-8') {
  468. $default['ellipsis'] = "\xe2\x80\xa6";
  469. }
  470. $options += $default;
  471. extract($options);
  472. if ($html) {
  473. if (mb_strlen(preg_replace('/<.*?>/', '', $text)) <= $length) {
  474. return $text;
  475. }
  476. $totalLength = mb_strlen(strip_tags($ellipsis));
  477. $openTags = [];
  478. $truncate = '';
  479. preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER);
  480. foreach ($tags as $tag) {
  481. if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/s', $tag[2])) {
  482. if (preg_match('/<[\w]+[^>]*>/s', $tag[0])) {
  483. array_unshift($openTags, $tag[2]);
  484. } elseif (preg_match('/<\/([\w]+)[^>]*>/s', $tag[0], $closeTag)) {
  485. $pos = array_search($closeTag[1], $openTags);
  486. if ($pos !== false) {
  487. array_splice($openTags, $pos, 1);
  488. }
  489. }
  490. }
  491. $truncate .= $tag[1];
  492. $contentLength = mb_strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $tag[3]));
  493. if ($contentLength + $totalLength > $length) {
  494. $left = $length - $totalLength;
  495. $entitiesLength = 0;
  496. if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $tag[3], $entities, PREG_OFFSET_CAPTURE)) {
  497. foreach ($entities[0] as $entity) {
  498. if ($entity[1] + 1 - $entitiesLength <= $left) {
  499. $left--;
  500. $entitiesLength += mb_strlen($entity[0]);
  501. } else {
  502. break;
  503. }
  504. }
  505. }
  506. $truncate .= mb_substr($tag[3], 0, $left + $entitiesLength);
  507. break;
  508. } else {
  509. $truncate .= $tag[3];
  510. $totalLength += $contentLength;
  511. }
  512. if ($totalLength >= $length) {
  513. break;
  514. }
  515. }
  516. } else {
  517. if (mb_strlen($text) <= $length) {
  518. return $text;
  519. }
  520. $truncate = mb_substr($text, 0, $length - mb_strlen($ellipsis));
  521. }
  522. if (!$exact) {
  523. $spacepos = mb_strrpos($truncate, ' ');
  524. if ($html) {
  525. $truncateCheck = mb_substr($truncate, 0, $spacepos);
  526. $lastOpenTag = mb_strrpos($truncateCheck, '<');
  527. $lastCloseTag = mb_strrpos($truncateCheck, '>');
  528. if ($lastOpenTag > $lastCloseTag) {
  529. preg_match_all('/<[\w]+[^>]*>/s', $truncate, $lastTagMatches);
  530. $lastTag = array_pop($lastTagMatches[0]);
  531. $spacepos = mb_strrpos($truncate, $lastTag) + mb_strlen($lastTag);
  532. }
  533. $bits = mb_substr($truncate, $spacepos);
  534. preg_match_all('/<\/([a-z]+)>/', $bits, $droppedTags, PREG_SET_ORDER);
  535. if (!empty($droppedTags)) {
  536. if (!empty($openTags)) {
  537. foreach ($droppedTags as $closingTag) {
  538. if (!in_array($closingTag[1], $openTags)) {
  539. array_unshift($openTags, $closingTag[1]);
  540. }
  541. }
  542. } else {
  543. foreach ($droppedTags as $closingTag) {
  544. $openTags[] = $closingTag[1];
  545. }
  546. }
  547. }
  548. }
  549. $truncate = mb_substr($truncate, 0, $spacepos);
  550. }
  551. $truncate .= $ellipsis;
  552. if ($html) {
  553. foreach ($openTags as $tag) {
  554. $truncate .= '</' . $tag . '>';
  555. }
  556. }
  557. return $truncate;
  558. }
  559. /**
  560. * Extracts an excerpt from the text surrounding the phrase with a number of characters on each side
  561. * determined by radius.
  562. *
  563. * @param string $text String to search the phrase in
  564. * @param string $phrase Phrase that will be searched for
  565. * @param int $radius The amount of characters that will be returned on each side of the founded phrase
  566. * @param string $ellipsis Ending that will be appended
  567. * @return string Modified string
  568. * @link http://book.cakephp.org/3.0/en/core-libraries/string.html#extracting-an-excerpt
  569. */
  570. public static function excerpt($text, $phrase, $radius = 100, $ellipsis = '...')
  571. {
  572. if (empty($text) || empty($phrase)) {
  573. return static::truncate($text, $radius * 2, ['ellipsis' => $ellipsis]);
  574. }
  575. $append = $prepend = $ellipsis;
  576. $phraseLen = mb_strlen($phrase);
  577. $textLen = mb_strlen($text);
  578. $pos = mb_strpos(mb_strtolower($text), mb_strtolower($phrase));
  579. if ($pos === false) {
  580. return mb_substr($text, 0, $radius) . $ellipsis;
  581. }
  582. $startPos = $pos - $radius;
  583. if ($startPos <= 0) {
  584. $startPos = 0;
  585. $prepend = '';
  586. }
  587. $endPos = $pos + $phraseLen + $radius;
  588. if ($endPos >= $textLen) {
  589. $endPos = $textLen;
  590. $append = '';
  591. }
  592. $excerpt = mb_substr($text, $startPos, $endPos - $startPos);
  593. $excerpt = $prepend . $excerpt . $append;
  594. return $excerpt;
  595. }
  596. /**
  597. * Creates a comma separated list where the last two items are joined with 'and', forming natural language.
  598. *
  599. * @param array $list The list to be joined.
  600. * @param string $and The word used to join the last and second last items together with. Defaults to 'and'.
  601. * @param string $separator The separator used to join all the other items together. Defaults to ', '.
  602. * @return string The glued together string.
  603. * @link http://book.cakephp.org/3.0/en/core-libraries/string.html#converting-an-array-to-sentence-form
  604. */
  605. public static function toList(array $list, $and = null, $separator = ', ')
  606. {
  607. if ($and === null) {
  608. $and = __d('cake', 'and');
  609. }
  610. if (count($list) > 1) {
  611. return implode($separator, array_slice($list, null, -1)) . ' ' . $and . ' ' . array_pop($list);
  612. }
  613. return array_pop($list);
  614. }
  615. /**
  616. * Check if the string contain multibyte characters
  617. *
  618. * @param string $string value to test
  619. * @return bool
  620. */
  621. public static function isMultibyte($string)
  622. {
  623. $length = strlen($string);
  624. for ($i = 0; $i < $length; $i++) {
  625. $value = ord(($string[$i]));
  626. if ($value > 128) {
  627. return true;
  628. }
  629. }
  630. return false;
  631. }
  632. /**
  633. * Converts a multibyte character string
  634. * to the decimal value of the character
  635. *
  636. * @param string $string String to convert.
  637. * @return array
  638. */
  639. public static function utf8($string)
  640. {
  641. $map = [];
  642. $values = [];
  643. $find = 1;
  644. $length = strlen($string);
  645. for ($i = 0; $i < $length; $i++) {
  646. $value = ord($string[$i]);
  647. if ($value < 128) {
  648. $map[] = $value;
  649. } else {
  650. if (empty($values)) {
  651. $find = ($value < 224) ? 2 : 3;
  652. }
  653. $values[] = $value;
  654. if (count($values) === $find) {
  655. if ($find == 3) {
  656. $map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64);
  657. } else {
  658. $map[] = (($values[0] % 32) * 64) + ($values[1] % 64);
  659. }
  660. $values = [];
  661. $find = 1;
  662. }
  663. }
  664. }
  665. return $map;
  666. }
  667. /**
  668. * Converts the decimal value of a multibyte character string
  669. * to a string
  670. *
  671. * @param array $array Array
  672. * @return string
  673. */
  674. public static function ascii(array $array)
  675. {
  676. $ascii = '';
  677. foreach ($array as $utf8) {
  678. if ($utf8 < 128) {
  679. $ascii .= chr($utf8);
  680. } elseif ($utf8 < 2048) {
  681. $ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64));
  682. $ascii .= chr(128 + ($utf8 % 64));
  683. } else {
  684. $ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096));
  685. $ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64));
  686. $ascii .= chr(128 + ($utf8 % 64));
  687. }
  688. }
  689. return $ascii;
  690. }
  691. /**
  692. * Converts filesize from human readable string to bytes
  693. *
  694. * @param string $size Size in human readable string like '5MB', '5M', '500B', '50kb' etc.
  695. * @param mixed $default Value to be returned when invalid size was used, for example 'Unknown type'
  696. * @return mixed Number of bytes as integer on success, `$default` on failure if not false
  697. * @throws \InvalidArgumentException On invalid Unit type.
  698. * @link http://book.cakephp.org/3.0/en/core-libraries/helpers/text.html
  699. */
  700. public static function parseFileSize($size, $default = false)
  701. {
  702. if (ctype_digit($size)) {
  703. return (int)$size;
  704. }
  705. $size = strtoupper($size);
  706. $l = -2;
  707. $i = array_search(substr($size, -2), ['KB', 'MB', 'GB', 'TB', 'PB']);
  708. if ($i === false) {
  709. $l = -1;
  710. $i = array_search(substr($size, -1), ['K', 'M', 'G', 'T', 'P']);
  711. }
  712. if ($i !== false) {
  713. $size = substr($size, 0, $l);
  714. return $size * pow(1024, $i + 1);
  715. }
  716. if (substr($size, -1) === 'B' && ctype_digit(substr($size, 0, -1))) {
  717. $size = substr($size, 0, -1);
  718. return (int)$size;
  719. }
  720. if ($default !== false) {
  721. return $default;
  722. }
  723. throw new InvalidArgumentException('No unit type.');
  724. }
  725. }