Xml.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. <?php
  2. /**
  3. * CakePHP(tm) : Rapid Development Framework (https://cakephp.org)
  4. * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
  5. *
  6. * Licensed under The MIT License
  7. * For full copyright and license information, please see the LICENSE.txt
  8. * Redistributions of files must retain the above copyright notice.
  9. *
  10. * @copyright Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
  11. * @link https://cakephp.org CakePHP(tm) Project
  12. * @since 0.10.3
  13. * @license https://opensource.org/licenses/mit-license.php MIT License
  14. */
  15. namespace Cake\Utility;
  16. use Cake\Utility\Exception\XmlException;
  17. use DOMDocument;
  18. use DOMNode;
  19. use DOMText;
  20. use Exception;
  21. use SimpleXMLElement;
  22. /**
  23. * XML handling for CakePHP.
  24. *
  25. * The methods in these classes enable the datasources that use XML to work.
  26. */
  27. class Xml
  28. {
  29. /**
  30. * Initialize SimpleXMLElement or DOMDocument from a given XML string, file path, URL or array.
  31. *
  32. * ### Usage:
  33. *
  34. * Building XML from a string:
  35. *
  36. * ```
  37. * $xml = Xml::build('<example>text</example>');
  38. * ```
  39. *
  40. * Building XML from string (output DOMDocument):
  41. *
  42. * ```
  43. * $xml = Xml::build('<example>text</example>', ['return' => 'domdocument']);
  44. * ```
  45. *
  46. * Building XML from a file path:
  47. *
  48. * ```
  49. * $xml = Xml::build('/path/to/an/xml/file.xml');
  50. * ```
  51. *
  52. * Building XML from a remote URL:
  53. *
  54. * ```
  55. * use Cake\Http\Client;
  56. *
  57. * $http = new Client();
  58. * $response = $http->get('http://example.com/example.xml');
  59. * $xml = Xml::build($response->body());
  60. * ```
  61. *
  62. * Building from an array:
  63. *
  64. * ```
  65. * $value = [
  66. * 'tags' => [
  67. * 'tag' => [
  68. * [
  69. * 'id' => '1',
  70. * 'name' => 'defect'
  71. * ],
  72. * [
  73. * 'id' => '2',
  74. * 'name' => 'enhancement'
  75. * ]
  76. * ]
  77. * ]
  78. * ];
  79. * $xml = Xml::build($value);
  80. * ```
  81. *
  82. * When building XML from an array ensure that there is only one top level element.
  83. *
  84. * ### Options
  85. *
  86. * - `return` Can be 'simplexml' to return object of SimpleXMLElement or 'domdocument' to return DOMDocument.
  87. * - `loadEntities` Defaults to false. Set to true to enable loading of `<!ENTITY` definitions. This
  88. * is disabled by default for security reasons.
  89. * - `readFile` Set to false to disable file reading. This is important to disable when
  90. * putting user data into Xml::build(). If enabled local files will be read if they exist.
  91. * Defaults to true for backwards compatibility reasons.
  92. * - `parseHuge` Enable the `LIBXML_PARSEHUGE` flag.
  93. *
  94. * If using array as input, you can pass `options` from Xml::fromArray.
  95. *
  96. * @param string|array $input XML string, a path to a file, a URL or an array
  97. * @param array $options The options to use
  98. * @return \SimpleXMLElement|\DOMDocument SimpleXMLElement or DOMDocument
  99. * @throws \Cake\Utility\Exception\XmlException
  100. */
  101. public static function build($input, array $options = [])
  102. {
  103. $defaults = [
  104. 'return' => 'simplexml',
  105. 'loadEntities' => false,
  106. 'readFile' => true,
  107. 'parseHuge' => false,
  108. ];
  109. $options += $defaults;
  110. if (is_array($input) || is_object($input)) {
  111. return static::fromArray($input, $options);
  112. }
  113. if (strpos($input, '<') !== false) {
  114. return static::_loadXml($input, $options);
  115. }
  116. if ($options['readFile'] && file_exists($input)) {
  117. return static::_loadXml(file_get_contents($input), $options);
  118. }
  119. if (!is_string($input)) {
  120. throw new XmlException('Invalid input.');
  121. }
  122. throw new XmlException('XML cannot be read.');
  123. }
  124. /**
  125. * Parse the input data and create either a SimpleXmlElement object or a DOMDocument.
  126. *
  127. * @param string $input The input to load.
  128. * @param array $options The options to use. See Xml::build()
  129. * @return \SimpleXMLElement|\DOMDocument
  130. * @throws \Cake\Utility\Exception\XmlException
  131. */
  132. protected static function _loadXml($input, $options)
  133. {
  134. $hasDisable = function_exists('libxml_disable_entity_loader');
  135. $internalErrors = libxml_use_internal_errors(true);
  136. if ($hasDisable && !$options['loadEntities']) {
  137. libxml_disable_entity_loader(true);
  138. }
  139. $flags = 0;
  140. if (!empty($options['parseHuge'])) {
  141. $flags |= LIBXML_PARSEHUGE;
  142. }
  143. try {
  144. if ($options['return'] === 'simplexml' || $options['return'] === 'simplexmlelement') {
  145. $flags |= LIBXML_NOCDATA;
  146. $xml = new SimpleXMLElement($input, $flags);
  147. } else {
  148. $xml = new DOMDocument();
  149. $xml->loadXML($input, $flags);
  150. }
  151. } catch (Exception $e) {
  152. $xml = null;
  153. }
  154. if ($hasDisable && !$options['loadEntities']) {
  155. libxml_disable_entity_loader(false);
  156. }
  157. libxml_use_internal_errors($internalErrors);
  158. if ($xml === null) {
  159. throw new XmlException('Xml cannot be read.');
  160. }
  161. return $xml;
  162. }
  163. /**
  164. * Transform an array into a SimpleXMLElement
  165. *
  166. * ### Options
  167. *
  168. * - `format` If create childs ('tags') or attributes ('attributes').
  169. * - `pretty` Returns formatted Xml when set to `true`. Defaults to `false`
  170. * - `version` Version of XML document. Default is 1.0.
  171. * - `encoding` Encoding of XML document. If null remove from XML header. Default is the some of application.
  172. * - `return` If return object of SimpleXMLElement ('simplexml') or DOMDocument ('domdocument'). Default is SimpleXMLElement.
  173. *
  174. * Using the following data:
  175. *
  176. * ```
  177. * $value = [
  178. * 'root' => [
  179. * 'tag' => [
  180. * 'id' => 1,
  181. * 'value' => 'defect',
  182. * '@' => 'description'
  183. * ]
  184. * ]
  185. * ];
  186. * ```
  187. *
  188. * Calling `Xml::fromArray($value, 'tags');` Will generate:
  189. *
  190. * `<root><tag><id>1</id><value>defect</value>description</tag></root>`
  191. *
  192. * And calling `Xml::fromArray($value, 'attributes');` Will generate:
  193. *
  194. * `<root><tag id="1" value="defect">description</tag></root>`
  195. *
  196. * @param array|\Cake\Collection\Collection $input Array with data or a collection instance.
  197. * @param string|array $options The options to use or a string to use as format.
  198. * @return \SimpleXMLElement|\DOMDocument SimpleXMLElement or DOMDocument
  199. * @throws \Cake\Utility\Exception\XmlException
  200. */
  201. public static function fromArray($input, $options = [])
  202. {
  203. if (is_object($input) && method_exists($input, 'toArray') && is_callable([$input, 'toArray'])) {
  204. $input = call_user_func([$input, 'toArray']);
  205. }
  206. if (!is_array($input) || count($input) !== 1) {
  207. throw new XmlException('Invalid input.');
  208. }
  209. $key = key($input);
  210. if (is_int($key)) {
  211. throw new XmlException('The key of input must be alphanumeric');
  212. }
  213. if (!is_array($options)) {
  214. $options = ['format' => (string)$options];
  215. }
  216. $defaults = [
  217. 'format' => 'tags',
  218. 'version' => '1.0',
  219. 'encoding' => mb_internal_encoding(),
  220. 'return' => 'simplexml',
  221. 'pretty' => false
  222. ];
  223. $options += $defaults;
  224. $dom = new DOMDocument($options['version'], $options['encoding']);
  225. if ($options['pretty']) {
  226. $dom->formatOutput = true;
  227. }
  228. self::_fromArray($dom, $dom, $input, $options['format']);
  229. $options['return'] = strtolower($options['return']);
  230. if ($options['return'] === 'simplexml' || $options['return'] === 'simplexmlelement') {
  231. return new SimpleXMLElement($dom->saveXML());
  232. }
  233. return $dom;
  234. }
  235. /**
  236. * Recursive method to create childs from array
  237. *
  238. * @param \DOMDocument $dom Handler to DOMDocument
  239. * @param \DOMElement $node Handler to DOMElement (child)
  240. * @param array $data Array of data to append to the $node.
  241. * @param string $format Either 'attributes' or 'tags'. This determines where nested keys go.
  242. * @return void
  243. * @throws \Cake\Utility\Exception\XmlException
  244. */
  245. protected static function _fromArray($dom, $node, &$data, $format)
  246. {
  247. if (empty($data) || !is_array($data)) {
  248. return;
  249. }
  250. foreach ($data as $key => $value) {
  251. if (is_string($key)) {
  252. if (is_object($value) && method_exists($value, 'toArray') && is_callable([$value, 'toArray'])) {
  253. $value = call_user_func([$value, 'toArray']);
  254. }
  255. if (!is_array($value)) {
  256. if (is_bool($value)) {
  257. $value = (int)$value;
  258. } elseif ($value === null) {
  259. $value = '';
  260. }
  261. $isNamespace = strpos($key, 'xmlns:');
  262. if ($isNamespace !== false) {
  263. $node->setAttributeNS('http://www.w3.org/2000/xmlns/', $key, $value);
  264. continue;
  265. }
  266. if ($key[0] !== '@' && $format === 'tags') {
  267. if (!is_numeric($value)) {
  268. // Escape special characters
  269. // https://www.w3.org/TR/REC-xml/#syntax
  270. // https://bugs.php.net/bug.php?id=36795
  271. $child = $dom->createElement($key, '');
  272. $child->appendChild(new DOMText($value));
  273. } else {
  274. $child = $dom->createElement($key, $value);
  275. }
  276. $node->appendChild($child);
  277. } else {
  278. if ($key[0] === '@') {
  279. $key = substr($key, 1);
  280. }
  281. $attribute = $dom->createAttribute($key);
  282. $attribute->appendChild($dom->createTextNode($value));
  283. $node->appendChild($attribute);
  284. }
  285. } else {
  286. if ($key[0] === '@') {
  287. throw new XmlException('Invalid array');
  288. }
  289. if (is_numeric(implode('', array_keys($value)))) {
  290. // List
  291. foreach ($value as $item) {
  292. $itemData = compact('dom', 'node', 'key', 'format');
  293. $itemData['value'] = $item;
  294. static::_createChild($itemData);
  295. }
  296. } else {
  297. // Struct
  298. static::_createChild(compact('dom', 'node', 'key', 'value', 'format'));
  299. }
  300. }
  301. } else {
  302. throw new XmlException('Invalid array');
  303. }
  304. }
  305. }
  306. /**
  307. * Helper to _fromArray(). It will create childs of arrays
  308. *
  309. * @param array $data Array with information to create childs
  310. * @return void
  311. */
  312. protected static function _createChild($data)
  313. {
  314. $data += [
  315. 'dom' => null,
  316. 'node' => null,
  317. 'key' => null,
  318. 'value' => null,
  319. 'format' => null,
  320. ];
  321. $value = $data['value'];
  322. $dom = $data['dom'];
  323. $key = $data['key'];
  324. $format = $data['format'];
  325. $node = $data['node'];
  326. $childNS = $childValue = null;
  327. if (is_object($value) && method_exists($value, 'toArray') && is_callable([$value, 'toArray'])) {
  328. $value = call_user_func([$value, 'toArray']);
  329. }
  330. if (is_array($value)) {
  331. if (isset($value['@'])) {
  332. $childValue = (string)$value['@'];
  333. unset($value['@']);
  334. }
  335. if (isset($value['xmlns:'])) {
  336. $childNS = $value['xmlns:'];
  337. unset($value['xmlns:']);
  338. }
  339. } elseif (!empty($value) || $value === 0 || $value === '0') {
  340. $childValue = (string)$value;
  341. }
  342. $child = $dom->createElement($key);
  343. if ($childValue !== null) {
  344. $child->appendChild($dom->createTextNode($childValue));
  345. }
  346. if ($childNS) {
  347. $child->setAttribute('xmlns', $childNS);
  348. }
  349. static::_fromArray($dom, $child, $value, $format);
  350. $node->appendChild($child);
  351. }
  352. /**
  353. * Returns this XML structure as an array.
  354. *
  355. * @param \SimpleXMLElement|\DOMDocument|\DOMNode $obj SimpleXMLElement, DOMDocument or DOMNode instance
  356. * @return array Array representation of the XML structure.
  357. * @throws \Cake\Utility\Exception\XmlException
  358. */
  359. public static function toArray($obj)
  360. {
  361. if ($obj instanceof DOMNode) {
  362. $obj = simplexml_import_dom($obj);
  363. }
  364. if (!($obj instanceof SimpleXMLElement)) {
  365. throw new XmlException('The input is not instance of SimpleXMLElement, DOMDocument or DOMNode.');
  366. }
  367. $result = [];
  368. $namespaces = array_merge(['' => ''], $obj->getNamespaces(true));
  369. static::_toArray($obj, $result, '', array_keys($namespaces));
  370. return $result;
  371. }
  372. /**
  373. * Recursive method to toArray
  374. *
  375. * @param \SimpleXMLElement $xml SimpleXMLElement object
  376. * @param array $parentData Parent array with data
  377. * @param string $ns Namespace of current child
  378. * @param array $namespaces List of namespaces in XML
  379. * @return void
  380. */
  381. protected static function _toArray($xml, &$parentData, $ns, $namespaces)
  382. {
  383. $data = [];
  384. foreach ($namespaces as $namespace) {
  385. foreach ($xml->attributes($namespace, true) as $key => $value) {
  386. if (!empty($namespace)) {
  387. $key = $namespace . ':' . $key;
  388. }
  389. $data['@' . $key] = (string)$value;
  390. }
  391. foreach ($xml->children($namespace, true) as $child) {
  392. static::_toArray($child, $data, $namespace, $namespaces);
  393. }
  394. }
  395. $asString = trim((string)$xml);
  396. if (empty($data)) {
  397. $data = $asString;
  398. } elseif (strlen($asString) > 0) {
  399. $data['@'] = $asString;
  400. }
  401. if (!empty($ns)) {
  402. $ns .= ':';
  403. }
  404. $name = $ns . $xml->getName();
  405. if (isset($parentData[$name])) {
  406. if (!is_array($parentData[$name]) || !isset($parentData[$name][0])) {
  407. $parentData[$name] = [$parentData[$name]];
  408. }
  409. $parentData[$name][] = $data;
  410. } else {
  411. $parentData[$name] = $data;
  412. }
  413. }
  414. }