TextLib.php 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. <?php
  2. App::uses('String', 'Utility');
  3. /**
  4. * Extend String.
  5. * //TODO: cleanup
  6. *
  7. * 2010-08-31 ms
  8. */
  9. class TextLib extends String {
  10. protected $text, $lenght, $char, $letter, $space, $word, $r_word, $sen, $r_sen, $para,
  11. $r_para, $beautified;
  12. public function __construct($text = null) {
  13. $this->text = $text;
  14. }
  15. /**
  16. * Count words in a text.
  17. *
  18. * //TODO use str_word_count() instead!!!
  19. *
  20. * @param string $text
  21. * @return int
  22. * 2009-11-11 ms
  23. */
  24. public static function numberOfWords($text) {
  25. $count = 0;
  26. $words = explode(' ', $text);
  27. foreach ($words as $word) {
  28. $word = trim($word);
  29. if (!empty($word)) {
  30. $count++;
  31. }
  32. }
  33. return $count;
  34. }
  35. /**
  36. * Return an abbreviated string, with characters in the middle of the
  37. * excessively long string replaced by $ending.
  38. *
  39. * @param string $text The original string.
  40. * @param integer $length The length at which to abbreviate.
  41. * @return string The abbreviated string, if longer than $length.
  42. */
  43. public static function abbreviate($text, $length = 20, $ending = '...') {
  44. return (mb_strlen($text) > $length)
  45. ? rtrim(mb_substr($text, 0, round(($length - 3) / 2))) . $ending . ltrim(mb_substr($text, (($length - 3) / 2) * -1))
  46. : $text;
  47. }
  48. /* other */
  49. public function convertToOrd($str = null, $separator = '-') {
  50. /*
  51. if (!class_exists('UnicodeLib')) {
  52. App::uses('UnicodeLib', 'Tools.Lib');
  53. }
  54. */
  55. if ($str === null) {
  56. $str = $this->text;
  57. }
  58. $chars = preg_split('//', $str, -1);
  59. $res = array();
  60. foreach ($chars as $char) {
  61. //$res[] = UnicodeLib::ord($char);
  62. $res[] = ord($char);
  63. }
  64. return implode($separator, $res);
  65. }
  66. public static function convertToOrdTable($str, $maxCols = 20) {
  67. $res = '<table>';
  68. $r = array('chr'=>array(), 'ord'=>array());
  69. $chars = preg_split('//', $str, -1);
  70. $count = 0;
  71. foreach ($chars as $key => $char) {
  72. if ($maxCols && $maxCols < $count || $key === count($chars)-1) {
  73. $res .= '<tr><th>'.implode('</th><th>', $r['chr']).'</th>';
  74. $res .= '</tr>';
  75. $res .= '<tr>';
  76. $res .= '<td>'.implode('</th><th>', $r['ord']).'</td></tr>';
  77. $count = 0;
  78. $r = array('chr'=>array(), 'ord'=>array());
  79. }
  80. $count++;
  81. //$res[] = UnicodeLib::ord($char);
  82. $r['ord'][] = ord($char);
  83. $r['chr'][] = $char;
  84. }
  85. $res .= '</table>';
  86. return $res;
  87. }
  88. /**
  89. * Explode a string of given tags into an array.
  90. */
  91. public function explodeTags($tags) {
  92. // This regexp allows the following types of user input:
  93. // this, "somecompany, llc", "and ""this"" w,o.rks", foo bar
  94. $regexp = '%(?:^|,\ *)("(?>[^"]*)(?>""[^"]* )*"|(?: [^",]*))%x';
  95. preg_match_all($regexp, $tags, $matches);
  96. $typed_tags = array_unique($matches[1]);
  97. $tags = array();
  98. foreach ($typed_tags as $tag) {
  99. // If a user has escaped a term (to demonstrate that it is a group,
  100. // or includes a comma or quote character), we remove the escape
  101. // formatting so to save the term into the database as the user intends.
  102. $tag = trim(str_replace('""', '"', preg_replace('/^"(.*)"$/', '\1', $tag)));
  103. if ($tag) {
  104. $tags[] = $tag;
  105. }
  106. }
  107. return $tags;
  108. }
  109. /**
  110. * Implode an array of tags into a string.
  111. */
  112. public function implodeTags($tags) {
  113. $encoded_tags = array();
  114. foreach ($tags as $tag) {
  115. // Commas and quotes in tag names are special cases, so encode them.
  116. if (strpos($tag, ',') !== FALSE || strpos($tag, '"') !== FALSE) {
  117. $tag = '"'. str_replace('"', '""', $tag) .'"';
  118. }
  119. $encoded_tags[] = $tag;
  120. }
  121. return implode(', ', $encoded_tags);
  122. }
  123. /**
  124. * Prevents [widow words](http://www.shauninman.com/archive/2006/08/22/widont_wordpress_plugin)
  125. * by inserting a non-breaking space between the last two words.
  126. *
  127. * echo Text::widont($text);
  128. *
  129. * @param string text to remove widows from
  130. * @return string
  131. */
  132. public function widont($str = null) {
  133. if ($str === null) {
  134. $str = $this->text;
  135. }
  136. $str = rtrim($str);
  137. $space = strrpos($str, ' ');
  138. if ($space !== FALSE) {
  139. $str = substr($str, 0, $space).'&nbsp;'.substr($str, $space + 1);
  140. }
  141. return $str;
  142. }
  143. /* text object specific */
  144. /**
  145. * Extract words
  146. *
  147. * @param options
  148. * - min_char, max_char, case_sensititive, ...
  149. * @return array
  150. * 2010-10-09 ms
  151. */
  152. public function words($options = array()) {
  153. if (true || !$this->xr_word) {
  154. $text = str_replace(array(PHP_EOL, NL, TB), ' ', $this->text);
  155. $pieces = explode(' ', $text);
  156. $pieces = array_unique($pieces);
  157. # strip chars like . or ,
  158. foreach ($pieces as $key => $piece) {
  159. if (empty($options['case_sensitive'])) {
  160. $piece = mb_strtolower($piece);
  161. }
  162. $search = array(',', '.', ';', ':', '#', '', '(', ')', '{', '}', '[', ']', '$', '%', '"', '!', '?', '<', '>', '=', '/');
  163. $search = array_merge($search, array(1, 2, 3, 4, 5, 6, 7, 8, 9, 0));
  164. $piece = str_replace($search, '', $piece);
  165. $piece = trim($piece);
  166. if (empty($piece) || !empty($options['min_char']) && mb_strlen($piece) < $options['min_char'] || !empty($options['max_char']) && mb_strlen($piece) > $options['max_char']) {
  167. unset($pieces[$key]);
  168. } else {
  169. $pieces[$key] = $piece;
  170. }
  171. }
  172. $pieces = array_unique($pieces);
  173. //$this->xr_word = $pieces;
  174. }
  175. return $pieces;
  176. }
  177. /**
  178. * Limit the number of words in a string.
  179. *
  180. * <code>
  181. * // Returns "This is a..."
  182. * echo TextExt::maxWords('This is a sentence.', 3);
  183. *
  184. * // Limit the number of words and append a custom ending
  185. * echo Str::words('This is a sentence.', 3, '---');
  186. * </code>
  187. *
  188. * @param string $value
  189. * @param int $words
  190. * @param array $options
  191. * - ellipsis
  192. * - html
  193. * @return string
  194. */
  195. public static function maxWords($value, $words = 100, $options = array()) {
  196. $default = array(
  197. 'ellipsis' => '...'
  198. );
  199. if (!empty($options['html']) && Configure::read('App.encoding') === 'UTF-8') {
  200. $default['ellipsis'] = "\xe2\x80\xa6";
  201. }
  202. $options = array_merge($default, $options);
  203. if (trim($value) === '') {
  204. return '';
  205. }
  206. preg_match('/^\s*+(?:\S++\s*+){1,'.$words.'}/u', $value, $matches);
  207. $end = $options['ellipsis'];
  208. if (mb_strlen($value) === mb_strlen($matches[0])) {
  209. $end = '';
  210. }
  211. return rtrim($matches[0]) . $end;
  212. }
  213. /**
  214. * High ASCII to Entities
  215. *
  216. * Converts High ascii text and MS Word special characters to character entities
  217. *
  218. * @param string
  219. * @return string
  220. */
  221. public function ascii_to_entities($str) {
  222. $count = 1;
  223. $out = '';
  224. $temp = array();
  225. for ($i = 0, $s = strlen($str); $i < $s; $i++) {
  226. $ordinal = ord($str[$i]);
  227. if ($ordinal < 128) {
  228. /*
  229. If the $temp array has a value but we have moved on, then it seems only
  230. fair that we output that entity and restart $temp before continuing. -Paul
  231. */
  232. if (count($temp) == 1) {
  233. $out .= '&#' . array_shift($temp) . ';';
  234. $count = 1;
  235. }
  236. $out .= $str[$i];
  237. } else {
  238. if (count($temp) == 0) {
  239. $count = ($ordinal < 224) ? 2 : 3;
  240. }
  241. $temp[] = $ordinal;
  242. if (count($temp) == $count) {
  243. $number = ($count == 3) ? (($temp['0'] % 16) * 4096) + (($temp['1'] % 64) * 64) + ($temp['2'] %
  244. 64) : (($temp['0'] % 32) * 64) + ($temp['1'] % 64);
  245. $out .= '&#' . $number . ';';
  246. $count = 1;
  247. $temp = array();
  248. }
  249. }
  250. }
  251. return $out;
  252. }
  253. // ------------------------------------------------------------------------
  254. /**
  255. * Entities to ASCII
  256. *
  257. * Converts character entities back to ASCII
  258. *
  259. * @param string
  260. * @param bool
  261. * @return string
  262. */
  263. public function entities_to_ascii($str, $all = true) {
  264. if (preg_match_all('/\&#(\d+)\;/', $str, $matches)) {
  265. for ($i = 0, $s = count($matches['0']); $i < $s; $i++) {
  266. $digits = $matches['1'][$i];
  267. $out = '';
  268. if ($digits < 128) {
  269. $out .= chr($digits);
  270. } elseif ($digits < 2048) {
  271. $out .= chr(192 + (($digits - ($digits % 64)) / 64));
  272. $out .= chr(128 + ($digits % 64));
  273. } else {
  274. $out .= chr(224 + (($digits - ($digits % 4096)) / 4096));
  275. $out .= chr(128 + ((($digits % 4096) - ($digits % 64)) / 64));
  276. $out .= chr(128 + ($digits % 64));
  277. }
  278. $str = str_replace($matches['0'][$i], $out, $str);
  279. }
  280. }
  281. if ($all) {
  282. $str = str_replace(array("&amp;", "&lt;", "&gt;", "&quot;", "&apos;", "&#45;"),
  283. array("&", "<", ">", "\"", "'", "-"), $str);
  284. }
  285. return $str;
  286. }
  287. /**
  288. * Reduce Double Slashes
  289. *
  290. * Converts double slashes in a string to a single slash,
  291. * except those found in http://
  292. *
  293. * http://www.some-site.com//index.php
  294. *
  295. * becomes:
  296. *
  297. * http://www.some-site.com/index.php
  298. *
  299. * @param string
  300. * @return string
  301. */
  302. public function reduce_double_slashes($str) {
  303. return preg_replace("#([^:])//+#", "\\1/", $str);
  304. }
  305. // ------------------------------------------------------------------------
  306. /**
  307. * Reduce Multiples
  308. *
  309. * Reduces multiple instances of a particular character. Example:
  310. *
  311. * Fred, Bill,, Joe, Jimmy
  312. *
  313. * becomes:
  314. *
  315. * Fred, Bill, Joe, Jimmy
  316. *
  317. * @param string
  318. * @param string the character you wish to reduce
  319. * @param bool TRUE/FALSE - whether to trim the character from the beginning/end
  320. * @return string
  321. */
  322. public function reduce_multiples($str, $character = ',', $trim = false) {
  323. $str = preg_replace('#' . preg_quote($character, '#') . '{2,}#', $character, $str);
  324. if ($trim === true) {
  325. $str = trim($str, $character);
  326. }
  327. return $str;
  328. }
  329. }