Xml.php 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. <?php
  2. /**
  3. * CakePHP(tm) : Rapid Development Framework (https://cakephp.org)
  4. * Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
  5. *
  6. * Licensed under The MIT License
  7. * For full copyright and license information, please see the LICENSE.txt
  8. * Redistributions of files must retain the above copyright notice.
  9. *
  10. * @copyright Copyright (c) Cake Software Foundation, Inc. (https://cakefoundation.org)
  11. * @link https://cakephp.org CakePHP(tm) Project
  12. * @since 0.10.3
  13. * @license https://opensource.org/licenses/mit-license.php MIT License
  14. */
  15. namespace Cake\Utility;
  16. use Cake\Utility\Exception\XmlException;
  17. use DOMDocument;
  18. use DOMNode;
  19. use DOMText;
  20. use Exception;
  21. use SimpleXMLElement;
  22. /**
  23. * XML handling for CakePHP.
  24. *
  25. * The methods in these classes enable the datasources that use XML to work.
  26. */
  27. class Xml
  28. {
  29. /**
  30. * Initialize SimpleXMLElement or DOMDocument from a given XML string, file path, URL or array.
  31. *
  32. * ### Usage:
  33. *
  34. * Building XML from a string:
  35. *
  36. * ```
  37. * $xml = Xml::build('<example>text</example>');
  38. * ```
  39. *
  40. * Building XML from string (output DOMDocument):
  41. *
  42. * ```
  43. * $xml = Xml::build('<example>text</example>', ['return' => 'domdocument']);
  44. * ```
  45. *
  46. * Building XML from a file path:
  47. *
  48. * ```
  49. * $xml = Xml::build('/path/to/an/xml/file.xml');
  50. * ```
  51. *
  52. * Building XML from a remote URL:
  53. *
  54. * ```
  55. * use Cake\Http\Client;
  56. *
  57. * $http = new Client();
  58. * $response = $http->get('http://example.com/example.xml');
  59. * $xml = Xml::build($response->body());
  60. * ```
  61. *
  62. * Building from an array:
  63. *
  64. * ```
  65. * $value = [
  66. * 'tags' => [
  67. * 'tag' => [
  68. * [
  69. * 'id' => '1',
  70. * 'name' => 'defect'
  71. * ],
  72. * [
  73. * 'id' => '2',
  74. * 'name' => 'enhancement'
  75. * ]
  76. * ]
  77. * ]
  78. * ];
  79. * $xml = Xml::build($value);
  80. * ```
  81. *
  82. * When building XML from an array ensure that there is only one top level element.
  83. *
  84. * ### Options
  85. *
  86. * - `return` Can be 'simplexml' to return object of SimpleXMLElement or 'domdocument' to return DOMDocument.
  87. * - `loadEntities` Defaults to false. Set to true to enable loading of `<!ENTITY` definitions. This
  88. * is disabled by default for security reasons.
  89. * - `readFile` Set to false to disable file reading. This is important to disable when
  90. * putting user data into Xml::build(). If enabled local files will be read if they exist.
  91. * Defaults to true for backwards compatibility reasons.
  92. * - `parseHuge` Enable the `LIBXML_PARSEHUGE` flag.
  93. *
  94. * If using array as input, you can pass `options` from Xml::fromArray.
  95. *
  96. * @param string|array $input XML string, a path to a file, a URL or an array
  97. * @param array $options The options to use
  98. * @return \SimpleXMLElement|\DOMDocument SimpleXMLElement or DOMDocument
  99. * @throws \Cake\Utility\Exception\XmlException
  100. */
  101. public static function build($input, array $options = [])
  102. {
  103. $defaults = [
  104. 'return' => 'simplexml',
  105. 'loadEntities' => false,
  106. 'readFile' => true,
  107. 'parseHuge' => false,
  108. ];
  109. $options += $defaults;
  110. if (is_array($input) || is_object($input)) {
  111. return static::fromArray($input, $options);
  112. }
  113. if (strpos($input, '<') !== false) {
  114. return static::_loadXml($input, $options);
  115. }
  116. if ($options['readFile'] && file_exists($input)) {
  117. return static::_loadXml(file_get_contents($input), $options);
  118. }
  119. if (!is_string($input)) {
  120. throw new XmlException('Invalid input.');
  121. }
  122. throw new XmlException('XML cannot be read.');
  123. }
  124. /**
  125. * Parse the input data and create either a SimpleXmlElement object or a DOMDocument.
  126. *
  127. * @param string $input The input to load.
  128. * @param array $options The options to use. See Xml::build()
  129. * @return \SimpleXMLElement|\DOMDocument
  130. * @throws \Cake\Utility\Exception\XmlException
  131. */
  132. protected static function _loadXml($input, $options)
  133. {
  134. $hasDisable = function_exists('libxml_disable_entity_loader');
  135. $internalErrors = libxml_use_internal_errors(true);
  136. if ($hasDisable && !$options['loadEntities']) {
  137. libxml_disable_entity_loader(true);
  138. }
  139. $flags = LIBXML_NOCDATA;
  140. if (!empty($options['parseHuge'])) {
  141. $flags |= LIBXML_PARSEHUGE;
  142. }
  143. try {
  144. if ($options['return'] === 'simplexml' || $options['return'] === 'simplexmlelement') {
  145. $xml = new SimpleXMLElement($input, $flags);
  146. } else {
  147. $xml = new DOMDocument();
  148. $xml->loadXML($input);
  149. }
  150. } catch (Exception $e) {
  151. $xml = null;
  152. }
  153. if ($hasDisable && !$options['loadEntities']) {
  154. libxml_disable_entity_loader(false);
  155. }
  156. libxml_use_internal_errors($internalErrors);
  157. if ($xml === null) {
  158. throw new XmlException('Xml cannot be read.');
  159. }
  160. return $xml;
  161. }
  162. /**
  163. * Transform an array into a SimpleXMLElement
  164. *
  165. * ### Options
  166. *
  167. * - `format` If create childs ('tags') or attributes ('attributes').
  168. * - `pretty` Returns formatted Xml when set to `true`. Defaults to `false`
  169. * - `version` Version of XML document. Default is 1.0.
  170. * - `encoding` Encoding of XML document. If null remove from XML header. Default is the some of application.
  171. * - `return` If return object of SimpleXMLElement ('simplexml') or DOMDocument ('domdocument'). Default is SimpleXMLElement.
  172. *
  173. * Using the following data:
  174. *
  175. * ```
  176. * $value = [
  177. * 'root' => [
  178. * 'tag' => [
  179. * 'id' => 1,
  180. * 'value' => 'defect',
  181. * '@' => 'description'
  182. * ]
  183. * ]
  184. * ];
  185. * ```
  186. *
  187. * Calling `Xml::fromArray($value, 'tags');` Will generate:
  188. *
  189. * `<root><tag><id>1</id><value>defect</value>description</tag></root>`
  190. *
  191. * And calling `Xml::fromArray($value, 'attributes');` Will generate:
  192. *
  193. * `<root><tag id="1" value="defect">description</tag></root>`
  194. *
  195. * @param array|\Cake\Collection\Collection $input Array with data or a collection instance.
  196. * @param string|array $options The options to use or a string to use as format.
  197. * @return \SimpleXMLElement|\DOMDocument SimpleXMLElement or DOMDocument
  198. * @throws \Cake\Utility\Exception\XmlException
  199. */
  200. public static function fromArray($input, $options = [])
  201. {
  202. if (is_object($input) && method_exists($input, 'toArray') && is_callable([$input, 'toArray'])) {
  203. $input = call_user_func([$input, 'toArray']);
  204. }
  205. if (!is_array($input) || count($input) !== 1) {
  206. throw new XmlException('Invalid input.');
  207. }
  208. $key = key($input);
  209. if (is_int($key)) {
  210. throw new XmlException('The key of input must be alphanumeric');
  211. }
  212. if (!is_array($options)) {
  213. $options = ['format' => (string)$options];
  214. }
  215. $defaults = [
  216. 'format' => 'tags',
  217. 'version' => '1.0',
  218. 'encoding' => mb_internal_encoding(),
  219. 'return' => 'simplexml',
  220. 'pretty' => false
  221. ];
  222. $options += $defaults;
  223. $dom = new DOMDocument($options['version'], $options['encoding']);
  224. if ($options['pretty']) {
  225. $dom->formatOutput = true;
  226. }
  227. self::_fromArray($dom, $dom, $input, $options['format']);
  228. $options['return'] = strtolower($options['return']);
  229. if ($options['return'] === 'simplexml' || $options['return'] === 'simplexmlelement') {
  230. return new SimpleXMLElement($dom->saveXML());
  231. }
  232. return $dom;
  233. }
  234. /**
  235. * Recursive method to create childs from array
  236. *
  237. * @param \DOMDocument $dom Handler to DOMDocument
  238. * @param \DOMElement $node Handler to DOMElement (child)
  239. * @param array $data Array of data to append to the $node.
  240. * @param string $format Either 'attributes' or 'tags'. This determines where nested keys go.
  241. * @return void
  242. * @throws \Cake\Utility\Exception\XmlException
  243. */
  244. protected static function _fromArray($dom, $node, &$data, $format)
  245. {
  246. if (empty($data) || !is_array($data)) {
  247. return;
  248. }
  249. foreach ($data as $key => $value) {
  250. if (is_string($key)) {
  251. if (is_object($value) && method_exists($value, 'toArray') && is_callable([$value, 'toArray'])) {
  252. $value = call_user_func([$value, 'toArray']);
  253. }
  254. if (!is_array($value)) {
  255. if (is_bool($value)) {
  256. $value = (int)$value;
  257. } elseif ($value === null) {
  258. $value = '';
  259. }
  260. $isNamespace = strpos($key, 'xmlns:');
  261. if ($isNamespace !== false) {
  262. $node->setAttributeNS('http://www.w3.org/2000/xmlns/', $key, $value);
  263. continue;
  264. }
  265. if ($key[0] !== '@' && $format === 'tags') {
  266. if (!is_numeric($value)) {
  267. // Escape special characters
  268. // https://www.w3.org/TR/REC-xml/#syntax
  269. // https://bugs.php.net/bug.php?id=36795
  270. $child = $dom->createElement($key, '');
  271. $child->appendChild(new DOMText($value));
  272. } else {
  273. $child = $dom->createElement($key, $value);
  274. }
  275. $node->appendChild($child);
  276. } else {
  277. if ($key[0] === '@') {
  278. $key = substr($key, 1);
  279. }
  280. $attribute = $dom->createAttribute($key);
  281. $attribute->appendChild($dom->createTextNode($value));
  282. $node->appendChild($attribute);
  283. }
  284. } else {
  285. if ($key[0] === '@') {
  286. throw new XmlException('Invalid array');
  287. }
  288. if (is_numeric(implode('', array_keys($value)))) {
  289. // List
  290. foreach ($value as $item) {
  291. $itemData = compact('dom', 'node', 'key', 'format');
  292. $itemData['value'] = $item;
  293. static::_createChild($itemData);
  294. }
  295. } else {
  296. // Struct
  297. static::_createChild(compact('dom', 'node', 'key', 'value', 'format'));
  298. }
  299. }
  300. } else {
  301. throw new XmlException('Invalid array');
  302. }
  303. }
  304. }
  305. /**
  306. * Helper to _fromArray(). It will create childs of arrays
  307. *
  308. * @param array $data Array with information to create childs
  309. * @return void
  310. */
  311. protected static function _createChild($data)
  312. {
  313. $data += [
  314. 'dom' => null,
  315. 'node' => null,
  316. 'key' => null,
  317. 'value' => null,
  318. 'format' => null,
  319. ];
  320. $value = $data['value'];
  321. $dom = $data['dom'];
  322. $key = $data['key'];
  323. $format = $data['format'];
  324. $node = $data['node'];
  325. $childNS = $childValue = null;
  326. if (is_object($value) && method_exists($value, 'toArray') && is_callable([$value, 'toArray'])) {
  327. $value = call_user_func([$value, 'toArray']);
  328. }
  329. if (is_array($value)) {
  330. if (isset($value['@'])) {
  331. $childValue = (string)$value['@'];
  332. unset($value['@']);
  333. }
  334. if (isset($value['xmlns:'])) {
  335. $childNS = $value['xmlns:'];
  336. unset($value['xmlns:']);
  337. }
  338. } elseif (!empty($value) || $value === 0 || $value === '0') {
  339. $childValue = (string)$value;
  340. }
  341. $child = $dom->createElement($key);
  342. if ($childValue !== null) {
  343. $child->appendChild($dom->createTextNode($childValue));
  344. }
  345. if ($childNS) {
  346. $child->setAttribute('xmlns', $childNS);
  347. }
  348. static::_fromArray($dom, $child, $value, $format);
  349. $node->appendChild($child);
  350. }
  351. /**
  352. * Returns this XML structure as an array.
  353. *
  354. * @param \SimpleXMLElement|\DOMDocument|\DOMNode $obj SimpleXMLElement, DOMDocument or DOMNode instance
  355. * @return array Array representation of the XML structure.
  356. * @throws \Cake\Utility\Exception\XmlException
  357. */
  358. public static function toArray($obj)
  359. {
  360. if ($obj instanceof DOMNode) {
  361. $obj = simplexml_import_dom($obj);
  362. }
  363. if (!($obj instanceof SimpleXMLElement)) {
  364. throw new XmlException('The input is not instance of SimpleXMLElement, DOMDocument or DOMNode.');
  365. }
  366. $result = [];
  367. $namespaces = array_merge(['' => ''], $obj->getNamespaces(true));
  368. static::_toArray($obj, $result, '', array_keys($namespaces));
  369. return $result;
  370. }
  371. /**
  372. * Recursive method to toArray
  373. *
  374. * @param \SimpleXMLElement $xml SimpleXMLElement object
  375. * @param array $parentData Parent array with data
  376. * @param string $ns Namespace of current child
  377. * @param array $namespaces List of namespaces in XML
  378. * @return void
  379. */
  380. protected static function _toArray($xml, &$parentData, $ns, $namespaces)
  381. {
  382. $data = [];
  383. foreach ($namespaces as $namespace) {
  384. foreach ($xml->attributes($namespace, true) as $key => $value) {
  385. if (!empty($namespace)) {
  386. $key = $namespace . ':' . $key;
  387. }
  388. $data['@' . $key] = (string)$value;
  389. }
  390. foreach ($xml->children($namespace, true) as $child) {
  391. static::_toArray($child, $data, $namespace, $namespaces);
  392. }
  393. }
  394. $asString = trim((string)$xml);
  395. if (empty($data)) {
  396. $data = $asString;
  397. } elseif (strlen($asString) > 0) {
  398. $data['@'] = $asString;
  399. }
  400. if (!empty($ns)) {
  401. $ns .= ':';
  402. }
  403. $name = $ns . $xml->getName();
  404. if (isset($parentData[$name])) {
  405. if (!is_array($parentData[$name]) || !isset($parentData[$name][0])) {
  406. $parentData[$name] = [$parentData[$name]];
  407. }
  408. $parentData[$name][] = $data;
  409. } else {
  410. $parentData[$name] = $data;
  411. }
  412. }
  413. }