Browse Source

Allow FileLib to also read CSV from strings

euromark 11 years ago
parent
commit
c51f426fe4
2 changed files with 192 additions and 69 deletions
  1. 112 52
      Lib/Utility/FileLib.php
  2. 80 17
      Test/Case/Lib/Utility/FileLibTest.php

+ 112 - 52
Lib/Utility/FileLib.php

@@ -39,17 +39,44 @@ class FileLib extends File {
 	/**
 	 * A better csv reader which handles encoding as well as removes completely empty lines
 	 *
-	 * @param int $length (0 = no limit)
-	 * @param string $delimiter (null defaults to ,)
-	 * @param string $enclosure (null defaults to " - do not pass empty string)
-	 * @param string $mode
-	 * @param string $force Force open/read the file
-	 * @param bool $removeEmpty Remove empty lines (simple newline characters without meaning)
-	 * @param bool $encode Encode to UTF-8
+	 * Options:
+	 * - int length (0 = no limit)
+	 * - string delimiter (null defaults to ,)
+	 * - string enclosure (null defaults to " - do not pass empty string)
+	 * - string mode
+	 * - string force Force open/read the file
+	 * - bool removeEmpty Remove empty lines (simple newline characters without meaning)
+	 * - bool encode Encode to UTF-8
+	 *
+	 * @param array $options Options
 	 * @return array Content or false on failure
 	 */
-	public function readCsv($length = 0, $delimiter = null, $enclosure = null, $mode = 'rb', $force = false, $removeEmpty = false, $encode = true) {
-		$res = array();
+	public function readCsv($options = array(), $delimiter = null, $enclosure = null, $mode = 'rb', $force = false, $removeEmpty = false, $encode = true) {
+		// For BC
+		if (!is_array($options)) {
+			$options = array(
+				'delimiter' => $delimiter !== null ? $delimiter : ',',
+				'enclosure' => $enclosure !== null ? $enclosure : '"',
+				'mode' => $mode,
+				'force' => $force,
+				'removeEmpty' => $removeEmpty,
+				'encode' => $encode,
+				'length' => $options
+			);
+		}
+		$defaults = array(
+			'delimiter' => ',',
+			'enclosure' => '"',
+			'escape' => "\\",
+			'mode' => 'rb',
+			'force' => false,
+			'removeEmpty' => false,
+			'encode' => true,
+			'length' => 0
+		);
+		$options += $defaults;
+		extract($options);
+
 		if ($this->open($mode, $force) === false) {
 			return false;
 		}
@@ -58,53 +85,31 @@ class FileLib extends File {
 			return false;
 		}
 
-		// php cannot handle delimiters with more than a single char
-		if (mb_strlen($delimiter) > 1) {
-			$count = 0;
-			while (!feof($this->handle)) {
-				if ($count > 100) {
-					throw new RuntimeException('max recursion depth');
-				}
-				$count++;
-				$tmp = fgets($this->handle, 8000);
-				$tmp = explode($delimiter, $tmp);
-				if ($encode) {
-					$tmp = $this->_encode($tmp);
-				}
-				$isEmpty = true;
-				foreach ($tmp as $key => $val) {
-					if (!empty($val)) {
-						$isEmpty = false;
-						break;
-					}
-				}
-				if ($isEmpty) {
-					continue;
-				}
-				$res[] = $tmp;
-			}
+		// PHP cannot handle delimiters with more than a single char
+		if (strlen($delimiter) > 1) {
+			throw new InternalErrorException('Invalid delimiter');
+		}
 
-		} else {
-			while (true) {
-				$data = fgetcsv($this->handle, $length, (isset($delimiter) ? $delimiter : ','), (isset($enclosure) ? $enclosure : '"'));
-				if ($data === false) {
+		$res = array();
+		while (true) {
+			$data = fgetcsv($this->handle, $length, $delimiter, $enclosure, $escape);
+			if ($data === false) {
+				break;
+			}
+			if ($encode) {
+				$data = $this->_encode($data);
+			}
+			$isEmpty = true;
+			foreach ($data as $key => $val) {
+				if (!empty($val)) {
+					$isEmpty = false;
 					break;
 				}
-				if ($encode) {
-					$data = $this->_encode($data);
-				}
-				$isEmpty = true;
-				foreach ($data as $key => $val) {
-					if (!empty($val)) {
-						$isEmpty = false;
-						break;
-					}
-				}
-				if ($isEmpty && $removeEmpty) {
-					continue;
-				}
-				$res[] = $data;
 			}
+			if ($isEmpty && $removeEmpty) {
+				continue;
+			}
+			$res[] = $data;
 		}
 
 		if ($this->lock !== null) {
@@ -115,6 +120,61 @@ class FileLib extends File {
 	}
 
 	/**
+	 * FileLib::readCsvFromString()
+	 *
+	 * @param string $string CSV content
+	 * @param array $options Options array
+	 * @return array Parsed content
+	 */
+	public static function readCsvFromString($string, $options = array()) {
+		$file = fopen("php://memory", "rw");
+		fwrite($file, $string);
+		fseek($file, 0);
+
+		$defaults = array(
+			'delimiter' => ',',
+			'enclosure' => '"',
+			'escape' => "\\",
+			'eol' => "\n",
+			'encode' => false,
+			'removeEmpty' => false
+		);
+		$options += $defaults;
+		extract($options);
+
+		// PHP cannot handle delimiters with more than a single char
+		if (strlen($delimiter) > 1) {
+			throw new InternalErrorException('Invalid delimiter');
+		}
+
+		$res = array();
+		while (true) {
+			$data = fgetcsv($file, 0, $delimiter, $enclosure, $escape);
+
+			if ($data === false) {
+				break;
+			}
+			if ($encode) {
+				$data = $this->_encode($data);
+			}
+			$isEmpty = true;
+			foreach ($data as $key => $val) {
+				if (!empty($val)) {
+					$isEmpty = false;
+					break;
+				}
+			}
+			if ($isEmpty && $removeEmpty) {
+				continue;
+			}
+			$res[] = $data;
+		}
+
+		fclose($file);
+		return $res;
+	}
+
+	/**
 	 * Write an array to a csv file
 	 *
 	 * @param array $data

+ 80 - 17
Test/Case/Lib/Utility/FileLibTest.php

@@ -21,7 +21,7 @@ class FileLibTest extends CakeTestCase {
 
 		$handler2 = new FileLib(TMP . 'test.txt', true);
 
-		$is = $handler2->readCsv(1024, ',', '"');
+		$is = $handler2->readCsv();
 		$expected = array(array(
 				'First',
 				'Last Name',
@@ -49,7 +49,7 @@ class FileLibTest extends CakeTestCase {
 
 		$handler2 = new FileLib(TMP . 'test.txt', true);
 
-		$is = $handler2->readCsv(1024, ',', '\'');
+		$is = $handler2->readCsv(array('enclosure' => '\''));
 		$expected = array(array(
 				'First',
 				'Last Name',
@@ -66,6 +66,65 @@ class FileLibTest extends CakeTestCase {
 	}
 
 	/**
+	 * Tests that tmpfile() hack works. One should use readCsvFromString() instead, though.
+	 *
+	 * @return void
+	 */
+	public function testReadCsvFromTmpFile() {
+		$message = '"First"; "Last Name"; "Email"' . NL . '"Example Äs"; "Firsty üs"; "test@test.com sß"';
+
+		$handle = tmpfile();
+		$meta = stream_get_meta_data($handle);
+		$filename = $meta['uri'];
+
+		$this->assertTrue(file_exists($filename));
+
+		$File = new FileLib($filename);
+		$File->write($message);
+		// This seems to be necessary in this case - fclose($handle) doesn't do the trick
+		$File->handle = $handle;
+
+		$array = $File->readCsv(array('delimiter' => ';'));
+		$File->close();
+
+		$this->assertFalse(file_exists($filename));
+
+		$this->assertTrue(count($array) === 2);
+		$this->assertEquals('Last Name', $array[0][1]);
+	}
+
+	/**
+	 * FileLibTest::testReadCsvFromString()
+	 *
+	 * @return void
+	 */
+	public function testReadCsvFromString() {
+		$csv = '\'First\', \'Last Name\', \'Email\'' . NL . '\'Example Äs\', \'Firsty üs\', \'test@test.com\'';
+		$array = FileLib::readCsvFromString($csv, array('enclosure' => '\''));
+		$this->assertEquals('Last Name', $array[0][1]);
+
+		$csv = '\'First\', \'Last Name\', \'Email\'' . NL . '\'Example Äs\', \'Firsty üs\', \'test@test.com \\\' sß\'';
+		$array = FileLib::readCsvFromString($csv, array('enclosure' => '\''));
+		$this->assertEquals('test@test.com \\\' sß', $array[1][2]);
+
+		$csv = '\'First\', \'Last Name\', \'Email\'' . NL . '\'Example Äs\', \'Firsty üs\', \'test@test.com \'\' sß\'';
+		$array = FileLib::readCsvFromString($csv, array('enclosure' => '\''));
+		$this->assertEquals('test@test.com \' sß', $array[1][2]);
+	}
+
+	/**
+	 * FileLibTest::testReadCsvFromString()
+	 *
+	 * @expectedException InternalErrorException
+	 * @return void
+	 */
+	public function testReadCsvFromStringInvalidMultibyteDelimiter() {
+		$csv = 'äFirstä, äLast Nameä, äEmailä' . NL . 'äExample Äsä, äFirsty üsä, ätest@test.com ää sßä';
+		$array = FileLib::readCsvFromString($csv, array('enclosure' => 'ä', 'delimiter' => 'ä'));
+		$this->assertEquals('test@test.com ä sß', $array[1][2]);
+	}
+
+	/**
 	 * Test method
 	 *
 	 * @return void
@@ -111,7 +170,7 @@ class FileLibTest extends CakeTestCase {
 		$this->assertTrue($res);
 
 		$handler = new FileLib(TMP . 'test.csv', true);
-		$res = $handler->readCsv(1024);
+		$res = $handler->readCsv();
 		$this->assertEquals($array, $res);
 	}
 
@@ -178,7 +237,7 @@ class FileLibTest extends CakeTestCase {
 
 		$handler2 = new FileLib(TMP . 'test.txt', true);
 
-		$is = $handler2->readCsv(1024, ',', '"');
+		$is = $handler2->readCsv();
 
 		$is = $handler2->transfer($is);
 		//pr($is);
@@ -201,7 +260,7 @@ class FileLibTest extends CakeTestCase {
 
 		$handler2 = new FileLib(TMP . 'test.txt', true);
 
-		$is = $handler2->readCsv(1024, ',', '"');
+		$is = $handler2->readCsv();
 		array_shift($is);
 		$is = $handler2->transfer($is, array('keys' => array(
 				'X',
@@ -227,7 +286,7 @@ class FileLibTest extends CakeTestCase {
 
 		$handler2 = new FileLib(TMP . 'test.txt', true);
 
-		$is = $handler2->readCsv(1024, ',', '"', 'rb', false, true);
+		$is = $handler2->readCsv(array('removeEmpty' => true));
 		array_shift($is);
 		$is = $handler2->transfer($is, array('keys' => array(
 				'X',
@@ -268,21 +327,25 @@ class FileLibTest extends CakeTestCase {
 
 	/** Helper Functions **/
 
-	public function _printArrays($status, $is, $expected, $pre = null) {
+	/**
+	 * FileLibTest::_printArrays()
+	 *
+	 * @param mixed $status
+	 * @param mixed $is
+	 * @param mixed $expected
+	 * @param mixed $pre
+	 * @return void
+	 */
+	protected function _printArrays($status, $is, $expected, $pre = null) {
 		if (!isset($_GET['show_passes']) || !$_GET['show_passes']) {
 			return false;
 		}
 
-		if ($pre !== null) {
-			//echo 'pre:';
-			//pr($pre);
-		}
-		//echo 'is:';
-		//pr($is);
-		if (!$status) {
-			//echo 'expected:';
-			//pr($expected);
-		}
+		echo 'Result:';
+		pr($is);
+
+		echo 'Expected:';
+		pr($expected);
 	}
 
 }