text = $text; } /** * @param string $stringToCheck * @param tolerance (in %: 0 ... 1) * @return boolean $success * 2011-10-13 ms */ public function isScreamFont($str = null, $tolerance = 0.4) { if ($str === null) { $str = $this->text; } if (empty($str)) { return false; } $res = preg_match_all('/[A-ZÄÖÜ]/u', $str, $uppercase); $uppercase = array_shift($uppercase); //echo returns($uppercase); $res = preg_match_all('/[a-zäöüß]/u', $str, $lowercase); $lowercase = array_shift($lowercase); //echo returns($lowercase); if (($countUpper = count($uppercase)) && $countUpper >= count($lowercase)) { return true; } //TODO: tolerance return false; } /** * @param string * @param string $additionalChars * - e.g. `-()0123456789` */ public function isWord($str = null, $additionalChars = null) { return preg_match('/^\w+$/', $str); } /* utf8 generell stuff */ /** * Tests whether a string contains only 7-bit ASCII bytes. This is used to * determine when to use native functions or UTF-8 functions. * * $ascii = UTF8::is_ascii($str); * * @param string string to check * @return bool */ public function isAscii($str = null) { if ($str === null) { $str = $this->text; } return !preg_match('/[^\x00-\x7F]/S', $str); } /** * Strips out device control codes in the ASCII range. * * $str = UTF8::strip_ascii_ctrl($str); * * @param string string to clean * @return string */ public function stripAsciiCtrl($str = null) { if ($str === null) { $str = $this->text; } return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str); } /** * Strips out all non-7bit ASCII bytes. * * $str = UTF8::strip_non_ascii($str); * * @param string string to clean * @return string */ public function stripNonAscii($str = null) { if ($str === null) { $str = $this->text; } return preg_replace('/[^\x00-\x7F]+/S', '', $str); } public function convertToOrd($str = null, $separator = '-') { /* if (!class_exists('UnicodeLib')) { App::uses('UnicodeLib', 'Tools.Lib'); } */ if ($str === null) { $str = $this->text; } $chars = preg_split('//', $str, -1); $res = array(); foreach ($chars as $char) { //$res[] = UnicodeLib::ord($char); $res[] = ord($char); } return implode($separator, $res); } public static function convertToOrdTable($str, $maxCols = 20) { $res = '
| '.implode(' | ', $r['chr']).' | '; $res .= '
|---|---|
| '.implode(' | ', $r['ord']).' |
* // Returns "This is a..."
* echo TextExt::maxWords('This is a sentence.', 3);
*
* // Limit the number of words and append a custom ending
* echo Str::words('This is a sentence.', 3, '---');
*
*
* @param string $value
* @param int $words
* @param array $options
* - ellipsis
* - html
* @return string
*/
public static function maxWords($value, $words = 100, $options = array()) {
$default = array(
'ellipsis' => '...'
);
if (!empty($options['html']) && Configure::read('App.encoding') == 'UTF-8') {
$default['ellipsis'] = "\xe2\x80\xa6";
}
$options = array_merge($default, $options);
if (trim($value) === '') {
return '';
}
preg_match('/^\s*+(?:\S++\s*+){1,'.$words.'}/u', $value, $matches);
$end = $options['ellipsis'];
if (mb_strlen($value) === mb_strlen($matches[0])) {
$end = '';
}
return rtrim($matches[0]) . $end;
}
/**
* @param options
* - min_char, max_char, case_sensititive, sort ('asc', 'desc', 'length', 'alpha', false), limit...
* 2010-10-09 ms
*/
public function wordCount($options = array()) {
if (true || !$this->rr_word) {
$text = str_replace(array(NL, CR, PHP_EOL, TB), ' ', $this->text);
$res = array();
$search = array('*', '+', '~', ',', '.', ';', ':', '#', '', '(', ')', '{', '}', '[', ']', '$', '%', '“', '”', '—', '"', '‘', '’', '!', '?', '<', '>', '=', '/');
$search = array_merge($search, array(1, 2, 3, 4, 5, 6, 7, 8, 9, 0));
$text = str_replace($search, ' ', $text);
$pieces = explode(' ', $text);
//TODO: use array_count_values()?
foreach ($pieces as $key => $piece) {
if (empty($options['case_sensitive'])) {
$piece = mb_strtolower($piece);
}
$piece = trim($piece);
if (empty($piece) || !empty($options['min_char']) && mb_strlen($piece) < $options['min_char'] || !empty($options['max_char']) && mb_strlen($piece) > $options['max_char']) {
unset($pieces[$key]);
continue;
}
if (!array_key_exists($piece, $res)) {
$res[$piece] = 0;
}
$res[$piece]++;
}
if (!empty($options['sort'])) {
$sort = strtolower($options['sort']);
if ($sort == 'asc') {
asort($res);
} elseif ($sort == 'desc') {
arsort($res);
} elseif ($sort == 'length') {
//TODO:
//uasort($res, $callback);
} elseif ($sort == 'alpha') {
ksort($res);
}
}
if (!empty($options['limit'])) {
$res = array_slice($res, 0, (int)$options['limit'], true);
}
//$this->rr_word = $res;
}
return $res; // $this->rr_word;
}
public function getSentence($parse = false) {
if (!$this->sen && !$this->r_sen) {
@preg_match_all("/[^:|;|\!|\.]+(:|;|\!|\.| )+/", $this->text, $m);
$this->sen = count($m[0]);
foreach ($m[0] as $s) $this->r_sen[] = strtr(trim($s), array("\n" => '', "\r" =>
''));
}
return $parse ? $this->r_sen : $this->sen;
}
public function getParagraph($parse = false) {
if (!$this->para && !$this->r_para) {
@preg_match_all("/[^\n]+?(:|;|\!|\.| )+\n/s", strtr($this->text, array("\r" =>
'')) . "\n", $m);
$this->para = count($m[0]);
foreach ($m[0] as $p) $this->r_para[] = trim($p);
}
return $parse ? $this->r_para : $this->para;
}
public function beautify($wordwrap = false) {
if (!$this->beautified) {
$this->beautified = @preg_replace(array("/ {1,}/", "/\. {1,}\./", "/\. *(?!\.)/",
"/(,|:|;|\!|\)) */", "/(,|:|;|\!|\)|\.) *\r\n/", "/(\r\n) {3,}/"), array(" ", ".",
". ", "$1 ", "$1\r\n", "\r\n\r\n"), $this->text);
}
return $wordwrap ? wordwrap($this->beautified, $wordwrap) : $this->beautified;
}
/**
* High ASCII to Entities
*
* Converts High ascii text and MS Word special characters to character entities
*
* @access public
* @param string
* @return string
*/
public function ascii_to_entities($str) {
$count = 1;
$out = '';
$temp = array();
for ($i = 0, $s = strlen($str); $i < $s; $i++) {
$ordinal = ord($str[$i]);
if ($ordinal < 128) {
/*
If the $temp array has a value but we have moved on, then it seems only
fair that we output that entity and restart $temp before continuing. -Paul
*/
if (count($temp) == 1) {
$out .= '' . array_shift($temp) . ';';
$count = 1;
}
$out .= $str[$i];
} else {
if (count($temp) == 0) {
$count = ($ordinal < 224) ? 2 : 3;
}
$temp[] = $ordinal;
if (count($temp) == $count) {
$number = ($count == 3) ? (($temp['0'] % 16) * 4096) + (($temp['1'] % 64) * 64) + ($temp['2'] %
64) : (($temp['0'] % 32) * 64) + ($temp['1'] % 64);
$out .= '' . $number . ';';
$count = 1;
$temp = array();
}
}
}
return $out;
}
// ------------------------------------------------------------------------
/**
* Entities to ASCII
*
* Converts character entities back to ASCII
*
* @access public
* @param string
* @param bool
* @return string
*/
public function entities_to_ascii($str, $all = true) {
if (preg_match_all('/\(\d+)\;/', $str, $matches)) {
for ($i = 0, $s = count($matches['0']); $i < $s; $i++) {
$digits = $matches['1'][$i];
$out = '';
if ($digits < 128) {
$out .= chr($digits);
} elseif ($digits < 2048) {
$out .= chr(192 + (($digits - ($digits % 64)) / 64));
$out .= chr(128 + ($digits % 64));
} else {
$out .= chr(224 + (($digits - ($digits % 4096)) / 4096));
$out .= chr(128 + ((($digits % 4096) - ($digits % 64)) / 64));
$out .= chr(128 + ($digits % 64));
}
$str = str_replace($matches['0'][$i], $out, $str);
}
}
if ($all) {
$str = str_replace(array("&", "<", ">", """, "'", "-"),
array("&", "<", ">", "\"", "'", "-"), $str);
}
return $str;
}
/**
* Reduce Double Slashes
*
* Converts double slashes in a string to a single slash,
* except those found in http://
*
* http://www.some-site.com//index.php
*
* becomes:
*
* http://www.some-site.com/index.php
*
* @access public
* @param string
* @return string
*/
public function reduce_double_slashes($str) {
return preg_replace("#([^:])//+#", "\\1/", $str);
}
// ------------------------------------------------------------------------
/**
* Reduce Multiples
*
* Reduces multiple instances of a particular character. Example:
*
* Fred, Bill,, Joe, Jimmy
*
* becomes:
*
* Fred, Bill, Joe, Jimmy
*
* @access public
* @param string
* @param string the character you wish to reduce
* @param bool TRUE/FALSE - whether to trim the character from the beginning/end
* @return string
*/
public function reduce_multiples($str, $character = ',', $trim = false) {
$str = preg_replace('#' . preg_quote($character, '#') . '{2,}#', $character, $str);
if ($trim === true) {
$str = trim($str, $character);
}
return $str;
}
}
/*
//explode string, return word and number of repeation
$r = explode('[spilit]', $value);
//regex
if ( preg_match('/([a-z]+)/', $r[0])) {
preg_match_all( '/'. $r[0] .'/', $this -> checkString[$arrays], $match);
} else {
preg_match_all( '/\\'. $r[0] .'/', $this -> checkString[$arrays], $match);
}
//count chars
if ( count($match[0]) <= $r[1]) {
$this -> _is_valid[$arrays][$valData] = true;
} else {
$this -> _is_valid[$arrays][$valData] = false;
//set errors array
$this -> error[$arrays][] = $r[0] . $this -> error_max_time_char;
}
*/