Emogrifier.php 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793
  1. <?php
  2. /**
  3. * This class provides functions for converting CSS styles into inline style attributes in your HTML code.
  4. *
  5. * For more information, please see the README.md file.
  6. *
  7. * @author Cameron Brooks
  8. * @author Jaime Prado
  9. * @author Roman Ožana <ozana@omdesign.cz>
  10. */
  11. class Emogrifier {
  12. /**
  13. * @var string
  14. */
  15. const ENCODING = 'UTF-8';
  16. /**
  17. * @var integer
  18. */
  19. const CACHE_KEY_CSS = 0;
  20. /**
  21. * @var integer
  22. */
  23. const CACHE_KEY_SELECTOR = 1;
  24. /**
  25. * @var integer
  26. */
  27. const CACHE_KEY_XPATH = 2;
  28. /**
  29. * @var integer
  30. */
  31. const CACHE_KEY_CSS_DECLARATION_BLOCK = 3;
  32. /**
  33. * for calculating nth-of-type and nth-child selectors
  34. *
  35. * @var integer
  36. */
  37. const INDEX = 0;
  38. /**
  39. * for calculating nth-of-type and nth-child selectors
  40. *
  41. * @var integer
  42. */
  43. const MULTIPLIER = 1;
  44. /**
  45. * @var string
  46. */
  47. const ID_ATTRIBUTE_MATCHER = '/(\\w+)?\\#([\\w\\-]+)/';
  48. /**
  49. * @var string
  50. */
  51. const CLASS_ATTRIBUTE_MATCHER = '/(\\w+|[\\*\\]])?((\\.[\\w\\-]+)+)/';
  52. /**
  53. * @var string
  54. */
  55. private $html = '';
  56. /**
  57. * @var string
  58. */
  59. private $css = '';
  60. /**
  61. * @var array<string>
  62. */
  63. private $unprocessableHtmlTags = ['wbr'];
  64. /**
  65. * @var array<array>
  66. */
  67. private $caches = [
  68. self::CACHE_KEY_CSS => [],
  69. self::CACHE_KEY_SELECTOR => [],
  70. self::CACHE_KEY_XPATH => [],
  71. self::CACHE_KEY_CSS_DECLARATION_BLOCK => [],
  72. ];
  73. /**
  74. * the visited nodes with the XPath paths as array keys
  75. *
  76. * @var array<\DOMNode>
  77. */
  78. private $visitedNodes = [];
  79. /**
  80. * the styles to apply to the nodes with the XPath paths as array keys for the outer array and the attribute names/values
  81. * as key/value pairs for the inner array
  82. *
  83. * @var array<array><string>
  84. */
  85. private $styleAttributesForNodes = [];
  86. /**
  87. * This attribute applies to the case where you want to preserve your original text encoding.
  88. *
  89. * By default, emogrifier translates your text into HTML entities for two reasons:
  90. *
  91. * 1. Because of client incompatibilities, it is better practice to send out HTML entities rather than unicode over email.
  92. *
  93. * 2. It translates any illegal XML characters that DOMDocument cannot work with.
  94. *
  95. * If you would like to preserve your original encoding, set this attribute to TRUE.
  96. *
  97. * @var boolean
  98. */
  99. public $preserveEncoding = FALSE;
  100. /**
  101. * The constructor.
  102. *
  103. * @param string $html the HTML to emogrify, must be UTF-8-encoded
  104. * @param string $css the CSS to merge, must be UTF-8-encoded
  105. */
  106. public function __construct($html = '', $css = '') {
  107. $this->setHtml($html);
  108. $this->setCss($css);
  109. }
  110. /**
  111. * The destructor.
  112. */
  113. public function __destruct() {
  114. $this->purgeVisitedNodes();
  115. }
  116. /**
  117. * Sets the HTML to emogrify.
  118. *
  119. * @param string $html the HTML to emogrify, must be UTF-8-encoded
  120. *
  121. * @return void
  122. */
  123. public function setHtml($html = '') {
  124. $this->html = $html;
  125. }
  126. /**
  127. * Sets the CSS to merge with the HTML.
  128. *
  129. * @param string $css the CSS to merge, must be UTF-8-encoded
  130. *
  131. * @return void
  132. */
  133. public function setCss($css = '') {
  134. $this->css = $css;
  135. }
  136. /**
  137. * Clears all caches.
  138. *
  139. * @return void
  140. */
  141. private function clearAllCaches() {
  142. $this->clearCache(self::CACHE_KEY_CSS);
  143. $this->clearCache(self::CACHE_KEY_SELECTOR);
  144. $this->clearCache(self::CACHE_KEY_XPATH);
  145. $this->clearCache(self::CACHE_KEY_CSS_DECLARATION_BLOCK);
  146. }
  147. /**
  148. * Clears a single cache by key.
  149. *
  150. * @param integer $key the cache key, must be CACHE_KEY_CSS, CACHE_KEY_SELECTOR, CACHE_KEY_XPATH or CACHE_KEY_CSS_DECLARATION_BLOCK
  151. *
  152. * @return void
  153. *
  154. * @throws \InvalidArgumentException
  155. */
  156. private function clearCache($key) {
  157. $allowedCacheKeys = [self::CACHE_KEY_CSS, self::CACHE_KEY_SELECTOR, self::CACHE_KEY_XPATH, self::CACHE_KEY_CSS_DECLARATION_BLOCK];
  158. if (!in_array($key, $allowedCacheKeys, TRUE)) {
  159. throw new \InvalidArgumentException('Invalid cache key: ' . $key, 1391822035);
  160. }
  161. $this->caches[$key] = [];
  162. }
  163. /**
  164. * Purges the visited nodes.
  165. *
  166. * @return void
  167. */
  168. private function purgeVisitedNodes() {
  169. $this->visitedNodes = [];
  170. $this->styleAttributesForNodes = [];
  171. }
  172. /**
  173. * Marks a tag for removal.
  174. *
  175. * There are some HTML tags that DOMDocument cannot process, and it will throw an error if it encounters them.
  176. * In particular, DOMDocument will complain if you try to use HTML5 tags in an XHTML document.
  177. *
  178. * Note: The tags will not be removed if they have any content.
  179. *
  180. * @param string $tagName the tag name, e.g., "p"
  181. *
  182. * @return void
  183. */
  184. public function addUnprocessableHtmlTag($tagName) {
  185. $this->unprocessableHtmlTags[] = $tagName;
  186. }
  187. /**
  188. * Drops a tag from the removal list.
  189. *
  190. * @param string $tagName the tag name, e.g., "p"
  191. *
  192. * @return void
  193. */
  194. public function removeUnprocessableHtmlTag($tagName) {
  195. $key = array_search($tagName, $this->unprocessableHtmlTags, TRUE);
  196. if ($key !== FALSE) {
  197. unset($this->unprocessableHtmlTags[$key]);
  198. }
  199. }
  200. /**
  201. * Applies the CSS you submit to the HTML you submit.
  202. *
  203. * This method places the CSS inline.
  204. *
  205. * @return string
  206. *
  207. * @throws \BadMethodCallException
  208. */
  209. public function emogrify() {
  210. if ($this->html === '') {
  211. throw new \BadMethodCallException('Please set some HTML first before calling emogrify.', 1390393096);
  212. }
  213. $xmlDocument = $this->createXmlDocument();
  214. $xpath = new \DOMXPath($xmlDocument);
  215. $this->clearAllCaches();
  216. // before be begin processing the CSS file, parse the document and normalize all existing CSS attributes (changes 'DISPLAY: none' to 'display: none');
  217. // we wouldn't have to do this if DOMXPath supported XPath 2.0.
  218. // also store a reference of nodes with existing inline styles so we don't overwrite them
  219. $this->purgeVisitedNodes();
  220. $nodesWithStyleAttributes = $xpath->query('//*[@style]');
  221. if ($nodesWithStyleAttributes !== FALSE) {
  222. /** @var $nodeWithStyleAttribute \DOMNode */
  223. foreach ($nodesWithStyleAttributes as $node) {
  224. $normalizedOriginalStyle = preg_replace_callback(
  225. '/[A-z\\-]+(?=\\:)/S',
  226. function (array $m) {
  227. return strtolower($m[0]);
  228. },
  229. $node->getAttribute('style')
  230. );
  231. // in order to not overwrite existing style attributes in the HTML, we have to save the original HTML styles
  232. $nodePath = $node->getNodePath();
  233. if (!isset($this->styleAttributesForNodes[$nodePath])) {
  234. $this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationBlock($normalizedOriginalStyle);
  235. $this->visitedNodes[$nodePath] = $node;
  236. }
  237. $node->setAttribute('style', $normalizedOriginalStyle);
  238. }
  239. }
  240. // grab any existing style blocks from the html and append them to the existing CSS
  241. // (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS)
  242. $allCss = $this->css;
  243. $allCss .= $this->getCssFromAllStyleNodes($xpath);
  244. $cssParts = $this->splitCssAndMediaQuery($allCss);
  245. $cssKey = md5($cssParts['css']);
  246. if (!isset($this->caches[self::CACHE_KEY_CSS][$cssKey])) {
  247. // process the CSS file for selectors and definitions
  248. preg_match_all('/(?:^|[\\s^{}]*)([^{]+){([^}]*)}/mis', $cssParts['css'], $matches, PREG_SET_ORDER);
  249. $allSelectors = [];
  250. foreach ($matches as $key => $selectorString) {
  251. // if there is a blank definition, skip
  252. if (!strlen(trim($selectorString[2]))) {
  253. continue;
  254. }
  255. // else split by commas and duplicate attributes so we can sort by selector precedence
  256. $selectors = explode(',', $selectorString[1]);
  257. foreach ($selectors as $selector) {
  258. // don't process pseudo-elements and behavioral (dynamic) pseudo-classes; ONLY allow structural pseudo-classes
  259. if (strpos($selector, ':') !== FALSE && !preg_match('/:\\S+\\-(child|type)\\(/i', $selector)) {
  260. continue;
  261. }
  262. $allSelectors[] = ['selector' => trim($selector),
  263. 'attributes' => trim($selectorString[2]),
  264. // keep track of where it appears in the file, since order is important
  265. 'line' => $key,
  266. ];
  267. }
  268. }
  269. // now sort the selectors by precedence
  270. usort($allSelectors, [$this,'sortBySelectorPrecedence']);
  271. $this->caches[self::CACHE_KEY_CSS][$cssKey] = $allSelectors;
  272. }
  273. foreach ($this->caches[self::CACHE_KEY_CSS][$cssKey] as $value) {
  274. // query the body for the xpath selector
  275. $nodesMatchingCssSelectors = $xpath->query($this->translateCssToXpath(trim($value['selector'])));
  276. /** @var $node \DOMNode */
  277. foreach ($nodesMatchingCssSelectors as $node) {
  278. // if it has a style attribute, get it, process it, and append (overwrite) new stuff
  279. if ($node->hasAttribute('style')) {
  280. // break it up into an associative array
  281. $oldStyleDeclarations = $this->parseCssDeclarationBlock($node->getAttribute('style'));
  282. $newStyleDeclarations = $this->parseCssDeclarationBlock($value['attributes']);
  283. // new styles overwrite the old styles (not technically accurate, but close enough)
  284. $combinedArray = array_merge($oldStyleDeclarations, $newStyleDeclarations);
  285. $style = '';
  286. foreach ($combinedArray as $attributeName => $attributeValue) {
  287. $style .= (strtolower($attributeName) . ':' . $attributeValue . ';');
  288. }
  289. } else {
  290. // otherwise create a new style
  291. $style = trim($value['attributes']);
  292. }
  293. $node->setAttribute('style', $style);
  294. }
  295. }
  296. // now iterate through the nodes that contained inline styles in the original HTML
  297. foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) {
  298. $node = $this->visitedNodes[$nodePath];
  299. $currentStyleAttributes = $this->parseCssDeclarationBlock($node->getAttribute('style'));
  300. $combinedArray = array_merge($currentStyleAttributes, $styleAttributesForNode);
  301. $style = '';
  302. foreach ($combinedArray as $attributeName => $attributeValue) {
  303. $style .= (strtolower($attributeName) . ':' . $attributeValue . ';');
  304. }
  305. $node->setAttribute('style', $style);
  306. }
  307. // This removes styles from your email that contain display:none.
  308. // We need to look for display:none, but we need to do a case-insensitive search. Since DOMDocument only supports XPath 1.0,
  309. // lower-case() isn't available to us. We've thus far only set attributes to lowercase, not attribute values. Consequently, we need
  310. // to translate() the letters that would be in 'NONE' ("NOE") to lowercase.
  311. $nodesWithStyleDisplayNone = $xpath->query('//*[contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")]');
  312. // The checks on parentNode and is_callable below ensure that if we've deleted the parent node,
  313. // we don't try to call removeChild on a nonexistent child node
  314. if ($nodesWithStyleDisplayNone->length > 0) {
  315. /** @var $node \DOMNode */
  316. foreach ($nodesWithStyleDisplayNone as $node) {
  317. if ($node->parentNode && is_callable([$node->parentNode,'removeChild'])) {
  318. $node->parentNode->removeChild($node);
  319. }
  320. }
  321. }
  322. $this->copyCssWithMediaToStyleNode($cssParts, $xmlDocument);
  323. if ($this->preserveEncoding) {
  324. return mb_convert_encoding($xmlDocument->saveHTML(), self::ENCODING, 'HTML-ENTITIES');
  325. } else {
  326. return $xmlDocument->saveHTML();
  327. }
  328. }
  329. /**
  330. * Copies the media part from CSS array parts to $xmlDocument.
  331. *
  332. * @param array $cssParts
  333. * @param \DOMDocument $xmlDocument
  334. * @return void
  335. */
  336. public function copyCssWithMediaToStyleNode(array $cssParts, \DOMDocument $xmlDocument) {
  337. if (isset($cssParts['media']) && $cssParts['media'] !== '') {
  338. $this->addStyleElementToDocument($xmlDocument, $cssParts['media']);
  339. }
  340. }
  341. /**
  342. * Returns CSS content.
  343. *
  344. * @param \DOMXPath $xpath
  345. * @return string
  346. */
  347. private function getCssFromAllStyleNodes(\DOMXPath $xpath) {
  348. $styleNodes = $xpath->query('//style');
  349. if ($styleNodes === FALSE) {
  350. return '';
  351. }
  352. $css = '';
  353. /** @var $styleNode \DOMNode */
  354. foreach ($styleNodes as $styleNode) {
  355. $css .= "\n\n" . $styleNode->nodeValue;
  356. $styleNode->parentNode->removeChild($styleNode);
  357. }
  358. return $css;
  359. }
  360. /**
  361. * Adds a style element with $css to $document.
  362. *
  363. * @param \DOMDocument $document
  364. * @param string $css
  365. * @return void
  366. */
  367. private function addStyleElementToDocument(\DOMDocument $document, $css) {
  368. $styleElement = $document->createElement('style', $css);
  369. $styleAttribute = $document->createAttribute('type');
  370. $styleAttribute->value = 'text/css';
  371. $styleElement->appendChild($styleAttribute);
  372. $head = $this->getOrCreateHeadElement($document);
  373. $head->appendChild($styleElement);
  374. }
  375. /**
  376. * Returns the existing or creates a new head element in $document.
  377. *
  378. * @param \DOMDocument $document
  379. * @return \DOMNode the head element
  380. */
  381. private function getOrCreateHeadElement(\DOMDocument $document) {
  382. $head = $document->getElementsByTagName('head')->item(0);
  383. if ($head === NULL) {
  384. $head = $document->createElement('head');
  385. $html = $document->getElementsByTagName('html')->item(0);
  386. $html->insertBefore($head, $document->getElementsByTagName('body')->item(0));
  387. }
  388. return $head;
  389. }
  390. /**
  391. * Splits input CSS code to an array where:
  392. *
  393. * - key "css" will be contains clean CSS code
  394. * - key "media" will be contains all valuable media queries
  395. *
  396. * Example:
  397. *
  398. * The CSS code
  399. *
  400. * "@import "file.css"; h1 { color:red; } @media { h1 {}} @media tv { h1 {}}"
  401. *
  402. * will be parsed into the following array:
  403. *
  404. * "css" => "h1 { color:red; }"
  405. * "media" => "@media { h1 {}}"
  406. *
  407. * @param string $css
  408. * @return array
  409. */
  410. private function splitCssAndMediaQuery($css) {
  411. $media = '';
  412. $css = preg_replace_callback(
  413. '#@media\\s+(?:only\\s)?(?:[\\s{\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU',
  414. function($matches) use (&$media) {
  415. $media .= $matches[0];
  416. }, $css
  417. );
  418. // filter the CSS
  419. $search = [
  420. // get rid of css comment code
  421. '/\\/\\*.*\\*\\//sU',
  422. // strip out any import directives
  423. '/^\\s*@import\\s[^;]+;/misU',
  424. // strip remains media enclosures
  425. '/^\\s*@media\\s[^{]+{(.*)}\\s*}\\s/misU',
  426. ];
  427. $replace = [
  428. '',
  429. '',
  430. '',
  431. ];
  432. // clean CSS before output
  433. $css = preg_replace($search, $replace, $css);
  434. return ['css' => $css, 'media' => $media];
  435. }
  436. /**
  437. * Creates a DOMDocument instance with the current HTML.
  438. *
  439. * @return \DOMDocument
  440. */
  441. private function createXmlDocument() {
  442. $xmlDocument = new \DOMDocument;
  443. $xmlDocument->encoding = self::ENCODING;
  444. $xmlDocument->strictErrorChecking = FALSE;
  445. $xmlDocument->formatOutput = TRUE;
  446. $libXmlState = libxml_use_internal_errors(TRUE);
  447. $xmlDocument->loadHTML($this->getUnifiedHtml());
  448. libxml_clear_errors();
  449. libxml_use_internal_errors($libXmlState);
  450. $xmlDocument->normalizeDocument();
  451. return $xmlDocument;
  452. }
  453. /**
  454. * Returns the HTML with the non-ASCII characters converts into HTML entities and the unprocessable HTML tags removed.
  455. *
  456. * @return string the unified HTML
  457. *
  458. * @throws \BadMethodCallException
  459. */
  460. private function getUnifiedHtml() {
  461. if (!empty($this->unprocessableHtmlTags)) {
  462. $unprocessableHtmlTags = implode('|', $this->unprocessableHtmlTags);
  463. $bodyWithoutUnprocessableTags = preg_replace('/<\\/?(' . $unprocessableHtmlTags . ')[^>]*>/i', '', $this->html);
  464. } else {
  465. $bodyWithoutUnprocessableTags = $this->html;
  466. }
  467. return mb_convert_encoding($bodyWithoutUnprocessableTags, 'HTML-ENTITIES', self::ENCODING);
  468. }
  469. /**
  470. * @param array $a
  471. * @param array $b
  472. *
  473. * @return integer
  474. */
  475. private function sortBySelectorPrecedence(array $a, array $b) {
  476. $precedenceA = $this->getCssSelectorPrecedence($a['selector']);
  477. $precedenceB = $this->getCssSelectorPrecedence($b['selector']);
  478. // We want these sorted in ascending order so selectors with lesser precedence get processed first and
  479. // selectors with greater precedence get sorted last.
  480. // The parenthesis around the -1 are necessary to avoid a PHP_CodeSniffer warning about missing spaces around
  481. // arithmetic operators.
  482. // @see http://forge.typo3.org/issues/55605
  483. $precedenceForEquals = ($a['line'] < $b['line'] ? (-1) : 1);
  484. $precedenceForNotEquals = ($precedenceA < $precedenceB ? (-1) : 1);
  485. return ($precedenceA === $precedenceB) ? $precedenceForEquals : $precedenceForNotEquals;
  486. }
  487. /**
  488. * @param string $selector
  489. *
  490. * @return integer
  491. */
  492. private function getCssSelectorPrecedence($selector) {
  493. $selectorKey = md5($selector);
  494. if (!isset($this->caches[self::CACHE_KEY_SELECTOR][$selectorKey])) {
  495. $precedence = 0;
  496. $value = 100;
  497. // ids: worth 100, classes: worth 10, elements: worth 1
  498. $search = ['\\#','\\.',''];
  499. foreach ($search as $s) {
  500. if (trim($selector == '')) {
  501. break;
  502. }
  503. $number = 0;
  504. $selector = preg_replace('/' . $s . '\\w+/', '', $selector, -1, $number);
  505. $precedence += ($value * $number);
  506. $value /= 10;
  507. }
  508. $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey] = $precedence;
  509. }
  510. return $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey];
  511. }
  512. /**
  513. * Right now, we support all CSS 1 selectors and most CSS2/3 selectors.
  514. *
  515. * @see http://plasmasturm.org/log/444/
  516. *
  517. * @param string $cssSelector
  518. *
  519. * @return string
  520. */
  521. private function translateCssToXpath($cssSelector) {
  522. $cssSelector = trim($cssSelector);
  523. $xpathKey = md5($cssSelector);
  524. if (!isset($this->caches[self::CACHE_KEY_XPATH][$xpathKey])) {
  525. // returns an Xpath selector
  526. $search = [
  527. // Matches any element that is a child of parent.
  528. '/\\s+>\\s+/',
  529. // Matches any element that is an adjacent sibling.
  530. '/\\s+\\+\\s+/',
  531. // Matches any element that is a descendant of an parent element element.
  532. '/\\s+/',
  533. // first-child pseudo-selector
  534. '/([^\\/]+):first-child/i',
  535. // last-child pseudo-selector
  536. '/([^\\/]+):last-child/i',
  537. // Matches attribute only selector
  538. '/^\\[(\\w+)\\]/',
  539. // Matches element with attribute
  540. '/(\\w)\\[(\\w+)\\]/',
  541. // Matches element with EXACT attribute
  542. '/(\\w)\\[(\\w+)\\=[\'"]?(\\w+)[\'"]?\\]/',
  543. ];
  544. $replace = [
  545. '/',
  546. '/following-sibling::*[1]/self::',
  547. '//',
  548. '*[1]/self::\\1',
  549. '*[last()]/self::\\1',
  550. '*[@\\1]',
  551. '\\1[@\\2]',
  552. '\\1[@\\2="\\3"]',
  553. ];
  554. $cssSelector = '//' . preg_replace($search, $replace, $cssSelector);
  555. $cssSelector = preg_replace_callback(self::ID_ATTRIBUTE_MATCHER, [$this, 'matchIdAttributes'], $cssSelector);
  556. $cssSelector = preg_replace_callback(self::CLASS_ATTRIBUTE_MATCHER, [$this, 'matchClassAttributes'], $cssSelector);
  557. // Advanced selectors are going to require a bit more advanced emogrification.
  558. // When we required PHP 5.3, we could do this with closures.
  559. $cssSelector = preg_replace_callback(
  560. '/([^\\/]+):nth-child\\(\s*(odd|even|[+\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i',
  561. [$this, 'translateNthChild'], $cssSelector
  562. );
  563. $cssSelector = preg_replace_callback(
  564. '/([^\\/]+):nth-of-type\\(\s*(odd|even|[+\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i',
  565. [$this, 'translateNthOfType'], $cssSelector
  566. );
  567. $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey] = $cssSelector;
  568. }
  569. return $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey];
  570. }
  571. /**
  572. * @param array $match
  573. *
  574. * @return string
  575. */
  576. private function matchIdAttributes(array $match) {
  577. return (strlen($match[1]) ? $match[1] : '*') . '[@id="' . $match[2] . '"]';
  578. }
  579. /**
  580. * @param array $match
  581. *
  582. * @return string
  583. */
  584. private function matchClassAttributes(array $match) {
  585. return (strlen($match[1]) ? $match[1] : '*') . '[contains(concat(" ",@class," "),concat(" ","' .
  586. implode(
  587. '"," "))][contains(concat(" ",@class," "),concat(" ","',
  588. explode('.', substr($match[2], 1))
  589. ) . '"," "))]';
  590. }
  591. /**
  592. * @param array $match
  593. *
  594. * @return string
  595. */
  596. private function translateNthChild(array $match) {
  597. $result = $this->parseNth($match);
  598. if (isset($result[self::MULTIPLIER])) {
  599. if ($result[self::MULTIPLIER] < 0) {
  600. $result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]);
  601. return sprintf('*[(last() - position()) mod %u = %u]/self::%s', $result[self::MULTIPLIER], $result[self::INDEX], $match[1]);
  602. } else {
  603. return sprintf('*[position() mod %u = %u]/self::%s', $result[self::MULTIPLIER], $result[self::INDEX], $match[1]);
  604. }
  605. } else {
  606. return sprintf('*[%u]/self::%s', $result[self::INDEX], $match[1]);
  607. }
  608. }
  609. /**
  610. * @param array $match
  611. *
  612. * @return string
  613. */
  614. private function translateNthOfType(array $match) {
  615. $result = $this->parseNth($match);
  616. if (isset($result[self::MULTIPLIER])) {
  617. if ($result[self::MULTIPLIER] < 0) {
  618. $result[self::MULTIPLIER] = abs($result[self::MULTIPLIER]);
  619. return sprintf('%s[(last() - position()) mod %u = %u]', $match[1], $result[self::MULTIPLIER], $result[self::INDEX]);
  620. } else {
  621. return sprintf('%s[position() mod %u = %u]', $match[1], $result[self::MULTIPLIER], $result[self::INDEX]);
  622. }
  623. } else {
  624. return sprintf('%s[%u]', $match[1], $result[self::INDEX]);
  625. }
  626. }
  627. /**
  628. * @param array $match
  629. *
  630. * @return array
  631. */
  632. private function parseNth(array $match) {
  633. if (in_array(strtolower($match[2]), ['even','odd'])) {
  634. $index = strtolower($match[2]) == 'even' ? 0 : 1;
  635. return [self::MULTIPLIER => 2, self::INDEX => $index];
  636. } elseif (stripos($match[2], 'n') === FALSE) {
  637. // if there is a multiplier
  638. $index = intval(str_replace(' ', '', $match[2]));
  639. return [self::INDEX => $index];
  640. } else {
  641. if (isset($match[3])) {
  642. $multipleTerm = str_replace($match[3], '', $match[2]);
  643. $index = intval(str_replace(' ', '', $match[3]));
  644. } else {
  645. $multipleTerm = $match[2];
  646. $index = 0;
  647. }
  648. $multiplier = str_ireplace('n', '', $multipleTerm);
  649. if (!strlen($multiplier)) {
  650. $multiplier = 1;
  651. } elseif ($multiplier == 0) {
  652. return [self::INDEX => $index];
  653. } else {
  654. $multiplier = intval($multiplier);
  655. }
  656. while ($index < 0) {
  657. $index += abs($multiplier);
  658. }
  659. return [self::MULTIPLIER => $multiplier, self::INDEX => $index];
  660. }
  661. }
  662. /**
  663. * Parses a CSS declaration block into property name/value pairs.
  664. *
  665. * Example:
  666. *
  667. * The declaration block
  668. *
  669. * "color: #000; font-weight: bold;"
  670. *
  671. * will be parsed into the following array:
  672. *
  673. * "color" => "#000"
  674. * "font-weight" => "bold"
  675. *
  676. * @param string $cssDeclarationBlock the CSS declaration block without the curly braces, may be empty
  677. *
  678. * @return array the CSS declarations with the property names as array keys and the property values as array values
  679. */
  680. private function parseCssDeclarationBlock($cssDeclarationBlock) {
  681. if (isset($this->caches[self::CACHE_KEY_CSS_DECLARATION_BLOCK][$cssDeclarationBlock])) {
  682. return $this->caches[self::CACHE_KEY_CSS_DECLARATION_BLOCK][$cssDeclarationBlock];
  683. }
  684. $properties = [];
  685. $declarations = explode(';', $cssDeclarationBlock);
  686. foreach ($declarations as $declaration) {
  687. $matches = [];
  688. if (!preg_match('/ *([a-z\\-]+) *: *([^;]+) */', $declaration, $matches)) {
  689. continue;
  690. }
  691. $propertyName = $matches[1];
  692. $propertyValue = $matches[2];
  693. $properties[$propertyName] = $propertyValue;
  694. }
  695. $this->caches[self::CACHE_KEY_CSS_DECLARATION_BLOCK][$cssDeclarationBlock] = $properties;
  696. return $properties;
  697. }
  698. }