FileLib.php 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. <?php
  2. App::uses('File', 'Utility');
  3. /**
  4. * Convenience class for reading, writing and appending to files.
  5. *
  6. */
  7. class FileLib extends File {
  8. /**
  9. * Allowed delimiters for csv
  10. */
  11. protected $allowedDelimiters = [
  12. ',',
  13. ';',
  14. '|',
  15. ' ',
  16. '#'];
  17. /**
  18. * Allowed enclosures for csv
  19. */
  20. protected $allowedEnclosures = ['"', '\''];
  21. /**
  22. * Allowed tags for pattern reading
  23. */
  24. protected $allowedTags = [
  25. '<h1>',
  26. '<h2>',
  27. '<h3>',
  28. '<p>',
  29. '<b>',
  30. '<a>',
  31. '<img>'];
  32. protected $defaultFormat = '%s';
  33. /**
  34. * A better csv reader which handles encoding as well as removes completely empty lines
  35. *
  36. * Options:
  37. * - int length (0 = no limit)
  38. * - string delimiter (null defaults to ,)
  39. * - string enclosure (null defaults to " - do not pass empty string)
  40. * - string mode
  41. * - string force Force open/read the file
  42. * - bool removeEmpty Remove empty lines (simple newline characters without meaning)
  43. * - bool encode Encode to UTF-8
  44. *
  45. * @param array $options Options
  46. * @return array Content or false on failure
  47. */
  48. public function readCsv($options = [], $delimiter = null, $enclosure = null, $mode = 'rb', $force = false, $removeEmpty = false, $encode = true) {
  49. // For BC
  50. if (!is_array($options)) {
  51. $options = [
  52. 'delimiter' => $delimiter !== null ? $delimiter : ',',
  53. 'enclosure' => $enclosure !== null ? $enclosure : '"',
  54. 'mode' => $mode,
  55. 'force' => $force,
  56. 'removeEmpty' => $removeEmpty,
  57. 'encode' => $encode,
  58. 'length' => $options
  59. ];
  60. }
  61. $defaults = [
  62. 'delimiter' => ',',
  63. 'enclosure' => '"',
  64. 'escape' => "\\",
  65. 'mode' => 'rb',
  66. 'force' => false,
  67. 'removeEmpty' => false,
  68. 'encode' => true,
  69. 'length' => 0
  70. ];
  71. $options += $defaults;
  72. extract($options);
  73. if ($this->open($mode, $force) === false) {
  74. return false;
  75. }
  76. if ($this->lock !== null && flock($this->handle, LOCK_SH) === false) {
  77. return false;
  78. }
  79. // PHP cannot handle delimiters with more than a single char
  80. if (strlen($delimiter) > 1) {
  81. throw new InternalErrorException('Invalid delimiter');
  82. }
  83. $res = [];
  84. while (true) {
  85. $data = fgetcsv($this->handle, $length, $delimiter, $enclosure, $escape);
  86. if ($data === false) {
  87. break;
  88. }
  89. if ($encode) {
  90. $data = $this->_encode($data);
  91. }
  92. $isEmpty = true;
  93. foreach ($data as $key => $val) {
  94. if (!empty($val)) {
  95. $isEmpty = false;
  96. break;
  97. }
  98. }
  99. if ($isEmpty && $removeEmpty) {
  100. continue;
  101. }
  102. $res[] = $data;
  103. }
  104. if ($this->lock !== null) {
  105. flock($this->handle, LOCK_UN);
  106. }
  107. $this->close();
  108. return $res;
  109. }
  110. /**
  111. * FileLib::readCsvFromString()
  112. *
  113. * @param string $string CSV content
  114. * @param array $options Options array
  115. * @return array Parsed content
  116. */
  117. public static function readCsvFromString($string, $options = []) {
  118. $file = fopen("php://memory", "rw");
  119. fwrite($file, $string);
  120. fseek($file, 0);
  121. $defaults = [
  122. 'delimiter' => ',',
  123. 'enclosure' => '"',
  124. 'escape' => "\\",
  125. 'eol' => "\n",
  126. 'encode' => false,
  127. 'removeEmpty' => false
  128. ];
  129. $options += $defaults;
  130. extract($options);
  131. // PHP cannot handle delimiters with more than a single char
  132. if (strlen($delimiter) > 1) {
  133. throw new InternalErrorException('Invalid delimiter');
  134. }
  135. $res = [];
  136. while (true) {
  137. $data = fgetcsv($file, 0, $delimiter, $enclosure, $escape);
  138. if ($data === false) {
  139. break;
  140. }
  141. if ($encode) {
  142. $data = $this->_encode($data);
  143. }
  144. $isEmpty = true;
  145. foreach ($data as $key => $val) {
  146. if (!empty($val)) {
  147. $isEmpty = false;
  148. break;
  149. }
  150. }
  151. if ($isEmpty && $removeEmpty) {
  152. continue;
  153. }
  154. $res[] = $data;
  155. }
  156. fclose($file);
  157. return $res;
  158. }
  159. /**
  160. * Write an array to a csv file
  161. *
  162. * @param array $data
  163. * @param string $delimiter (null defaults to ,)
  164. * @param string $enclosure (null defaults to " - do not pass empty string)
  165. * @return bool Success
  166. */
  167. public function writeCsv($data, $delimiter = null, $enclosure = null) {
  168. if ($this->open('w', true) !== true) {
  169. return false;
  170. }
  171. if ($this->lock !== null) {
  172. if (flock($this->handle, LOCK_EX) === false) {
  173. return false;
  174. }
  175. }
  176. $success = true;
  177. foreach ($data as $row) {
  178. if (fputcsv($this->handle, array_values((array)$row), (isset($delimiter) ? $delimiter : ','), (isset($enclosure) ? $enclosure : '"')) === false) {
  179. $success = false;
  180. }
  181. }
  182. if ($this->lock !== null) {
  183. flock($this->handle, LOCK_UN);
  184. }
  185. $this->close();
  186. return $success;
  187. }
  188. /**
  189. * Read files with fscanf() and pattern
  190. *
  191. * @param string $format (e.g. "%s\t%s\t%s\n")
  192. * @param string $mode
  193. * @param string $force Force open/read the file
  194. * @return array Content or false on failure
  195. */
  196. public function readWithPattern($format = null, $mode = 'rb', $force = false) {
  197. $res = [];
  198. if ($this->open($mode, $force) === false) {
  199. return false;
  200. }
  201. if ($this->lock !== null && flock($this->handle, LOCK_SH) === false) {
  202. return false;
  203. }
  204. if (empty($format)) {
  205. $format = $this->defaultFormat;
  206. }
  207. while (true) {
  208. $data = fscanf($this->handle, $format);
  209. if ($data === false) {
  210. break;
  211. }
  212. $res[] = $data;
  213. }
  214. if ($this->lock !== null) {
  215. flock($this->handle, LOCK_UN);
  216. }
  217. return $res;
  218. }
  219. /**
  220. * Return the contents of this File as a string - but without tags
  221. *
  222. * @param string/array $tags: <tag><tag2><tag3> or array('<tag>',...) otherwise default tags are used
  223. * @param string $mode
  224. * @param bool $force If true then the file will be re-opened even if its already opened, otherwise it won't
  225. * @return mixed string on success, false on failure
  226. */
  227. public function readWithTags($tags = null, $mode = 'rb', $force = false) {
  228. if ($this->open($mode, $force) === false) {
  229. return false;
  230. }
  231. if ($this->lock !== null && flock($this->handle, LOCK_SH) === false) {
  232. return false;
  233. }
  234. if (empty($tags)) {
  235. $tags = implode($this->allowedTags);
  236. } else {
  237. if (is_array($tags)) {
  238. $tags = implode($tags);
  239. }
  240. }
  241. $data = '';
  242. while (!feof($this->handle)) {
  243. $data .= fgetss($this->handle, 4096, $tags);
  244. }
  245. $data = trim($data);
  246. if ($this->lock !== null) {
  247. flock($this->handle, LOCK_UN);
  248. }
  249. return $data;
  250. }
  251. /**
  252. * Transfer array to cake structure
  253. *
  254. * @param data (usually with the first row as keys!)
  255. * @param options
  256. * - keys (defaults to first array content in data otherwise) (order is important!)
  257. * - preserve_keys (do not slug and lowercase)
  258. * @return array Result
  259. */
  260. public function transfer($data, $options = []) {
  261. $res = [];
  262. if (empty($options['keys'])) {
  263. $keys = array_shift($data);
  264. } else {
  265. $keys = $options['keys'];
  266. }
  267. foreach ($keys as $num => $key) {
  268. if (empty($options['preserve_keys'])) {
  269. $key = strtolower(Inflector::slug($key));
  270. }
  271. foreach ($data as $n => $val) {
  272. $res[$n][$key] = $val[$num];
  273. }
  274. }
  275. return $res;
  276. }
  277. /**
  278. * Assert proper encoding
  279. *
  280. * @param array Input
  281. * @return array Output
  282. */
  283. protected function _encode(array $array) {
  284. $convertedArray = [];
  285. foreach ($array as $key => $value) {
  286. if (!mb_check_encoding($key, 'UTF-8')) {
  287. $key = utf8_encode($key);
  288. }
  289. if (is_array($value)) {
  290. $value = $this->_encode($value);
  291. } else {
  292. if (!mb_check_encoding($value, 'UTF-8')) {
  293. $value = utf8_encode($value);
  294. }
  295. $value = trim($value);
  296. }
  297. $convertedArray[$key] = $value;
  298. }
  299. return $convertedArray;
  300. }
  301. /**
  302. * Check if a blob string contains the BOM.
  303. * Useful for file_get_contents() + json_decode() that needs the BOM removed.
  304. *
  305. * @param string $content
  306. * @return bool Success
  307. */
  308. public static function hasByteOrderMark($content) {
  309. return strpos($content, b"\xEF\xBB\xBF") === 0;
  310. }
  311. /**
  312. * Remove BOM from a blob string if detected.
  313. * Useful for file_get_contents() + json_decode() that needs the BOM removed.
  314. *
  315. * @param string $content
  316. * @return string Cleaned content
  317. */
  318. public static function removeByteOrderMark($content) {
  319. return trim($content, b"\xEF\xBB\xBF");
  320. }
  321. }