Html.php 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059
  1. <?php
  2. /**
  3. * This file is part of PHPWord - A pure PHP library for reading and writing
  4. * word processing documents.
  5. *
  6. * PHPWord is free software distributed under the terms of the GNU Lesser
  7. * General Public License version 3 as published by the Free Software Foundation.
  8. *
  9. * For the full copyright and license information, please read the LICENSE
  10. * file that was distributed with this source code. For the full list of
  11. * contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
  12. *
  13. * @see https://github.com/PHPOffice/PHPWord
  14. * @copyright 2010-2018 PHPWord contributors
  15. * @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
  16. */
  17. namespace PhpOffice\PhpWord\Shared;
  18. use PhpOffice\PhpWord\Element\AbstractContainer;
  19. use PhpOffice\PhpWord\Element\Row;
  20. use PhpOffice\PhpWord\Element\Table;
  21. use PhpOffice\PhpWord\Settings;
  22. use PhpOffice\PhpWord\SimpleType\Jc;
  23. use PhpOffice\PhpWord\SimpleType\NumberFormat;
  24. use PhpOffice\PhpWord\Style\Paragraph;
  25. /**
  26. * Common Html functions
  27. *
  28. * @SuppressWarnings(PHPMD.UnusedPrivateMethod) For readWPNode
  29. */
  30. class Html
  31. {
  32. protected static $listIndex = 0;
  33. protected static $xpath;
  34. protected static $options;
  35. /**
  36. * Add HTML parts.
  37. *
  38. * Note: $stylesheet parameter is removed to avoid PHPMD error for unused parameter
  39. * Warning: Do not pass user-generated HTML here, as that would allow an attacker to read arbitrary
  40. * files or perform server-side request forgery by passing local file paths or URLs in <img>.
  41. *
  42. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element Where the parts need to be added
  43. * @param string $html The code to parse
  44. * @param bool $fullHTML If it's a full HTML, no need to add 'body' tag
  45. * @param bool $preserveWhiteSpace If false, the whitespaces between nodes will be removed
  46. * @param array $options:
  47. * + IMG_SRC_SEARCH: optional to speed up images loading from remote url when files can be found locally
  48. * + IMG_SRC_REPLACE: optional to speed up images loading from remote url when files can be found locally
  49. */
  50. public static function addHtml($element, $html, $fullHTML = false, $preserveWhiteSpace = true, $options = null)
  51. {
  52. /*
  53. * @todo parse $stylesheet for default styles. Should result in an array based on id, class and element,
  54. * which could be applied when such an element occurs in the parseNode function.
  55. */
  56. self::$options = $options;
  57. // Preprocess: remove all line ends, decode HTML entity,
  58. // fix ampersand and angle brackets and add body tag for HTML fragments
  59. $html = str_replace(array("\n", "\r"), '', $html);
  60. $html = str_replace(array('&lt;', '&gt;', '&amp;', '&quot;'), array('_lt_', '_gt_', '_amp_', '_quot_'), $html);
  61. $html = html_entity_decode($html, ENT_QUOTES, 'UTF-8');
  62. $html = str_replace('&', '&amp;', $html);
  63. $html = str_replace(array('_lt_', '_gt_', '_amp_', '_quot_'), array('&lt;', '&gt;', '&amp;', '&quot;'), $html);
  64. if (false === $fullHTML) {
  65. $html = '<body>' . $html . '</body>';
  66. }
  67. // Load DOM
  68. if (\PHP_VERSION_ID < 80000) {
  69. $orignalLibEntityLoader = libxml_disable_entity_loader(true);
  70. }
  71. $dom = new \DOMDocument();
  72. $dom->preserveWhiteSpace = $preserveWhiteSpace;
  73. $dom->loadXML($html);
  74. self::$xpath = new \DOMXPath($dom);
  75. $node = $dom->getElementsByTagName('body');
  76. self::parseNode($node->item(0), $element);
  77. if (\PHP_VERSION_ID < 80000) {
  78. libxml_disable_entity_loader($orignalLibEntityLoader);
  79. }
  80. }
  81. /**
  82. * parse Inline style of a node
  83. *
  84. * @param \DOMNode $node Node to check on attributes and to compile a style array
  85. * @param array $styles is supplied, the inline style attributes are added to the already existing style
  86. * @return array
  87. */
  88. protected static function parseInlineStyle($node, $styles = array())
  89. {
  90. if (XML_ELEMENT_NODE == $node->nodeType) {
  91. $attributes = $node->attributes; // get all the attributes(eg: id, class)
  92. foreach ($attributes as $attribute) {
  93. $val = $attribute->value;
  94. switch (strtolower($attribute->name)) {
  95. case 'style':
  96. $styles = self::parseStyle($attribute, $styles);
  97. break;
  98. case 'align':
  99. $styles['alignment'] = self::mapAlign(trim($val));
  100. break;
  101. case 'lang':
  102. $styles['lang'] = $val;
  103. break;
  104. case 'width':
  105. // tables, cells
  106. if (false !== strpos($val, '%')) {
  107. // e.g. <table width="100%"> or <td width="50%">
  108. $styles['width'] = (int) $val * 50;
  109. $styles['unit'] = \PhpOffice\PhpWord\SimpleType\TblWidth::PERCENT;
  110. } else {
  111. // e.g. <table width="250> where "250" = 250px (always pixels)
  112. $styles['width'] = Converter::pixelToTwip($val);
  113. $styles['unit'] = \PhpOffice\PhpWord\SimpleType\TblWidth::TWIP;
  114. }
  115. break;
  116. case 'cellspacing':
  117. // tables e.g. <table cellspacing="2">, where "2" = 2px (always pixels)
  118. $val = (int) $val . 'px';
  119. $styles['cellSpacing'] = Converter::cssToTwip($val);
  120. break;
  121. case 'bgcolor':
  122. // tables, rows, cells e.g. <tr bgColor="#FF0000">
  123. $styles['bgColor'] = trim($val, '# ');
  124. break;
  125. case 'valign':
  126. // cells e.g. <td valign="middle">
  127. if (preg_match('#(?:top|bottom|middle|baseline)#i', $val, $matches)) {
  128. $styles['valign'] = self::mapAlignVertical($matches[0]);
  129. }
  130. break;
  131. }
  132. }
  133. }
  134. return $styles;
  135. }
  136. /**
  137. * Parse a node and add a corresponding element to the parent element.
  138. *
  139. * @param \DOMNode $node node to parse
  140. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element object to add an element corresponding with the node
  141. * @param array $styles Array with all styles
  142. * @param array $data Array to transport data to a next level in the DOM tree, for example level of listitems
  143. */
  144. protected static function parseNode($node, $element, $styles = array(), $data = array())
  145. {
  146. // Populate styles array
  147. $styleTypes = array('font', 'paragraph', 'list', 'table', 'row', 'cell');
  148. foreach ($styleTypes as $styleType) {
  149. if (!isset($styles[$styleType])) {
  150. $styles[$styleType] = array();
  151. }
  152. }
  153. // Node mapping table
  154. $nodes = array(
  155. // $method $node $element $styles $data $argument1 $argument2
  156. 'p' => array('Paragraph', $node, $element, $styles, null, null, null),
  157. 'h1' => array('Heading', null, $element, $styles, null, 'Heading1', null),
  158. 'h2' => array('Heading', null, $element, $styles, null, 'Heading2', null),
  159. 'h3' => array('Heading', null, $element, $styles, null, 'Heading3', null),
  160. 'h4' => array('Heading', null, $element, $styles, null, 'Heading4', null),
  161. 'h5' => array('Heading', null, $element, $styles, null, 'Heading5', null),
  162. 'h6' => array('Heading', null, $element, $styles, null, 'Heading6', null),
  163. '#text' => array('Text', $node, $element, $styles, null, null, null),
  164. 'strong' => array('Property', null, null, $styles, null, 'bold', true),
  165. 'b' => array('Property', null, null, $styles, null, 'bold', true),
  166. 'em' => array('Property', null, null, $styles, null, 'italic', true),
  167. 'i' => array('Property', null, null, $styles, null, 'italic', true),
  168. 'u' => array('Property', null, null, $styles, null, 'underline', 'single'),
  169. 'sup' => array('Property', null, null, $styles, null, 'superScript', true),
  170. 'sub' => array('Property', null, null, $styles, null, 'subScript', true),
  171. 'span' => array('Span', $node, null, $styles, null, null, null),
  172. 'font' => array('Span', $node, null, $styles, null, null, null),
  173. 'table' => array('Table', $node, $element, $styles, null, null, null),
  174. 'tr' => array('Row', $node, $element, $styles, null, null, null),
  175. 'td' => array('Cell', $node, $element, $styles, null, null, null),
  176. 'th' => array('Cell', $node, $element, $styles, null, null, null),
  177. 'ul' => array('List', $node, $element, $styles, $data, null, null),
  178. 'ol' => array('List', $node, $element, $styles, $data, null, null),
  179. 'li' => array('ListItem', $node, $element, $styles, $data, null, null),
  180. 'img' => array('Image', $node, $element, $styles, null, null, null),
  181. 'br' => array('LineBreak', null, $element, $styles, null, null, null),
  182. 'a' => array('Link', $node, $element, $styles, null, null, null),
  183. 'input' => array('Input', $node, $element, $styles, null, null, null),
  184. 'hr' => array('HorizRule', $node, $element, $styles, null, null, null),
  185. );
  186. $newElement = null;
  187. $keys = array('node', 'element', 'styles', 'data', 'argument1', 'argument2');
  188. if (isset($nodes[$node->nodeName])) {
  189. // Execute method based on node mapping table and return $newElement or null
  190. // Arguments are passed by reference
  191. $arguments = array();
  192. $args = array();
  193. list($method, $args[0], $args[1], $args[2], $args[3], $args[4], $args[5]) = $nodes[$node->nodeName];
  194. for ($i = 0; $i <= 5; $i++) {
  195. if ($args[$i] !== null) {
  196. $arguments[$keys[$i]] = &$args[$i];
  197. }
  198. }
  199. $method = "parse{$method}";
  200. $newElement = call_user_func_array(array('PhpOffice\PhpWord\Shared\Html', $method), array_values($arguments));
  201. // Retrieve back variables from arguments
  202. foreach ($keys as $key) {
  203. if (array_key_exists($key, $arguments)) {
  204. $$key = $arguments[$key];
  205. }
  206. }
  207. }
  208. if ($newElement === null) {
  209. $newElement = $element;
  210. }
  211. static::parseChildNodes($node, $newElement, $styles, $data);
  212. }
  213. /**
  214. * Parse child nodes.
  215. *
  216. * @param \DOMNode $node
  217. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  218. * @param array $styles
  219. * @param array $data
  220. */
  221. protected static function parseChildNodes($node, $element, $styles, $data)
  222. {
  223. if ('li' != $node->nodeName) {
  224. $cNodes = $node->childNodes;
  225. if (!empty($cNodes)) {
  226. foreach ($cNodes as $cNode) {
  227. if ($element instanceof AbstractContainer || $element instanceof Table || $element instanceof Row) {
  228. self::parseNode($cNode, $element, $styles, $data);
  229. }
  230. }
  231. }
  232. }
  233. }
  234. /**
  235. * Parse paragraph node
  236. *
  237. * @param \DOMNode $node
  238. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  239. * @param array &$styles
  240. * @return \PhpOffice\PhpWord\Element\TextRun
  241. */
  242. protected static function parseParagraph($node, $element, &$styles)
  243. {
  244. $styles['paragraph'] = self::recursiveParseStylesInHierarchy($node, $styles['paragraph']);
  245. $newElement = $element->addTextRun($styles['paragraph']);
  246. return $newElement;
  247. }
  248. /**
  249. * Parse input node
  250. *
  251. * @param \DOMNode $node
  252. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  253. * @param array &$styles
  254. */
  255. protected static function parseInput($node, $element, &$styles)
  256. {
  257. $attributes = $node->attributes;
  258. if (null === $attributes->getNamedItem('type')) {
  259. return;
  260. }
  261. $inputType = $attributes->getNamedItem('type')->value;
  262. switch ($inputType) {
  263. case 'checkbox':
  264. $checked = ($checked = $attributes->getNamedItem('checked')) && $checked->value === 'true' ? true : false;
  265. $textrun = $element->addTextRun($styles['paragraph']);
  266. $textrun->addFormField('checkbox')->setValue($checked);
  267. break;
  268. }
  269. }
  270. /**
  271. * Parse heading node
  272. *
  273. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  274. * @param array &$styles
  275. * @param string $argument1 Name of heading style
  276. * @return \PhpOffice\PhpWord\Element\TextRun
  277. *
  278. * @todo Think of a clever way of defining header styles, now it is only based on the assumption, that
  279. * Heading1 - Heading6 are already defined somewhere
  280. */
  281. protected static function parseHeading($element, &$styles, $argument1)
  282. {
  283. $styles['paragraph'] = $argument1;
  284. $newElement = $element->addTextRun($styles['paragraph']);
  285. return $newElement;
  286. }
  287. /**
  288. * Parse text node
  289. *
  290. * @param \DOMNode $node
  291. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  292. * @param array &$styles
  293. */
  294. protected static function parseText($node, $element, &$styles)
  295. {
  296. $styles['font'] = self::recursiveParseStylesInHierarchy($node, $styles['font']);
  297. //alignment applies on paragraph, not on font. Let's copy it there
  298. if (isset($styles['font']['alignment']) && is_array($styles['paragraph'])) {
  299. $styles['paragraph']['alignment'] = $styles['font']['alignment'];
  300. }
  301. if (is_callable(array($element, 'addText'))) {
  302. $element->addText($node->nodeValue, $styles['font'], $styles['paragraph']);
  303. }
  304. }
  305. /**
  306. * Parse property node
  307. *
  308. * @param array &$styles
  309. * @param string $argument1 Style name
  310. * @param string $argument2 Style value
  311. */
  312. protected static function parseProperty(&$styles, $argument1, $argument2)
  313. {
  314. $styles['font'][$argument1] = $argument2;
  315. }
  316. /**
  317. * Parse span node
  318. *
  319. * @param \DOMNode $node
  320. * @param array &$styles
  321. */
  322. protected static function parseSpan($node, &$styles)
  323. {
  324. self::parseInlineStyle($node, $styles['font']);
  325. }
  326. /**
  327. * Parse table node
  328. *
  329. * @param \DOMNode $node
  330. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  331. * @param array &$styles
  332. * @return Table $element
  333. *
  334. * @todo As soon as TableItem, RowItem and CellItem support relative width and height
  335. */
  336. protected static function parseTable($node, $element, &$styles)
  337. {
  338. $elementStyles = self::parseInlineStyle($node, $styles['table']);
  339. $newElement = $element->addTable($elementStyles);
  340. // $attributes = $node->attributes;
  341. // if ($attributes->getNamedItem('width') !== null) {
  342. // $newElement->setWidth($attributes->getNamedItem('width')->value);
  343. // }
  344. // if ($attributes->getNamedItem('height') !== null) {
  345. // $newElement->setHeight($attributes->getNamedItem('height')->value);
  346. // }
  347. // if ($attributes->getNamedItem('width') !== null) {
  348. // $newElement=$element->addCell($width=$attributes->getNamedItem('width')->value);
  349. // }
  350. return $newElement;
  351. }
  352. /**
  353. * Parse a table row
  354. *
  355. * @param \DOMNode $node
  356. * @param \PhpOffice\PhpWord\Element\Table $element
  357. * @param array &$styles
  358. * @return Row $element
  359. */
  360. protected static function parseRow($node, $element, &$styles)
  361. {
  362. $rowStyles = self::parseInlineStyle($node, $styles['row']);
  363. if ($node->parentNode->nodeName == 'thead') {
  364. $rowStyles['tblHeader'] = true;
  365. }
  366. return $element->addRow(null, $rowStyles);
  367. }
  368. /**
  369. * Parse table cell
  370. *
  371. * @param \DOMNode $node
  372. * @param \PhpOffice\PhpWord\Element\Table $element
  373. * @param array &$styles
  374. * @return \PhpOffice\PhpWord\Element\Cell|\PhpOffice\PhpWord\Element\TextRun $element
  375. */
  376. protected static function parseCell($node, $element, &$styles)
  377. {
  378. $cellStyles = self::recursiveParseStylesInHierarchy($node, $styles['cell']);
  379. $colspan = $node->getAttribute('colspan');
  380. if (!empty($colspan)) {
  381. $cellStyles['gridSpan'] = $colspan - 0;
  382. }
  383. // set cell width to control column widths
  384. $width = isset($cellStyles['width']) ? $cellStyles['width'] : null;
  385. unset($cellStyles['width']); // would not apply
  386. $cell = $element->addCell($width, $cellStyles);
  387. if (self::shouldAddTextRun($node)) {
  388. return $cell->addTextRun(self::filterOutNonInheritedStyles(self::parseInlineStyle($node, $styles['paragraph'])));
  389. }
  390. return $cell;
  391. }
  392. /**
  393. * Checks if $node contains an HTML element that cannot be added to TextRun
  394. *
  395. * @param \DOMNode $node
  396. * @return bool Returns true if the node contains an HTML element that cannot be added to TextRun
  397. */
  398. protected static function shouldAddTextRun(\DOMNode $node)
  399. {
  400. $containsBlockElement = self::$xpath->query('.//table|./p|./ul|./ol', $node)->length > 0;
  401. if ($containsBlockElement) {
  402. return false;
  403. }
  404. return true;
  405. }
  406. /**
  407. * Recursively parses styles on parent nodes
  408. * TODO if too slow, add caching of parent nodes, !! everything is static here so watch out for concurrency !!
  409. *
  410. * @param \DOMNode $node
  411. * @param array &$styles
  412. */
  413. protected static function recursiveParseStylesInHierarchy(\DOMNode $node, array $style)
  414. {
  415. $parentStyle = array();
  416. if ($node->parentNode != null && XML_ELEMENT_NODE == $node->parentNode->nodeType) {
  417. $parentStyle = self::recursiveParseStylesInHierarchy($node->parentNode, array());
  418. }
  419. if ($node->nodeName === '#text') {
  420. $parentStyle = array_merge($parentStyle, $style);
  421. } else {
  422. $parentStyle = self::filterOutNonInheritedStyles($parentStyle);
  423. }
  424. $style = self::parseInlineStyle($node, $parentStyle);
  425. return $style;
  426. }
  427. /**
  428. * Removes non-inherited styles from array
  429. *
  430. * @param array &$styles
  431. */
  432. protected static function filterOutNonInheritedStyles(array $styles)
  433. {
  434. $nonInheritedStyles = array(
  435. 'borderSize',
  436. 'borderTopSize',
  437. 'borderRightSize',
  438. 'borderBottomSize',
  439. 'borderLeftSize',
  440. 'borderColor',
  441. 'borderTopColor',
  442. 'borderRightColor',
  443. 'borderBottomColor',
  444. 'borderLeftColor',
  445. 'borderStyle',
  446. 'spaceAfter',
  447. 'spaceBefore',
  448. 'underline',
  449. 'strikethrough',
  450. 'hidden',
  451. );
  452. $styles = array_diff_key($styles, array_flip($nonInheritedStyles));
  453. return $styles;
  454. }
  455. /**
  456. * Parse list node
  457. *
  458. * @param \DOMNode $node
  459. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  460. * @param array &$styles
  461. * @param array &$data
  462. */
  463. protected static function parseList($node, $element, &$styles, &$data)
  464. {
  465. $isOrderedList = $node->nodeName === 'ol';
  466. if (isset($data['listdepth'])) {
  467. $data['listdepth']++;
  468. } else {
  469. $data['listdepth'] = 0;
  470. $styles['list'] = 'listStyle_' . self::$listIndex++;
  471. $style = $element->getPhpWord()->addNumberingStyle($styles['list'], self::getListStyle($isOrderedList));
  472. // extract attributes start & type e.g. <ol type="A" start="3">
  473. $start = 0;
  474. $type = '';
  475. foreach ($node->attributes as $attribute) {
  476. switch ($attribute->name) {
  477. case 'start':
  478. $start = (int) $attribute->value;
  479. break;
  480. case 'type':
  481. $type = $attribute->value;
  482. break;
  483. }
  484. }
  485. $levels = $style->getLevels();
  486. /** @var \PhpOffice\PhpWord\Style\NumberingLevel */
  487. $level = $levels[0];
  488. if ($start > 0) {
  489. $level->setStart($start);
  490. }
  491. $type = $type ? self::mapListType($type) : null;
  492. if ($type) {
  493. $level->setFormat($type);
  494. }
  495. }
  496. if ($node->parentNode->nodeName === 'li') {
  497. return $element->getParent();
  498. }
  499. }
  500. /**
  501. * @param bool $isOrderedList
  502. * @return array
  503. */
  504. protected static function getListStyle($isOrderedList)
  505. {
  506. if ($isOrderedList) {
  507. return array(
  508. 'type' => 'multilevel',
  509. 'levels' => array(
  510. array('format' => NumberFormat::DECIMAL, 'text' => '%1.', 'alignment' => 'left', 'tabPos' => 720, 'left' => 720, 'hanging' => 360),
  511. array('format' => NumberFormat::LOWER_LETTER, 'text' => '%2.', 'alignment' => 'left', 'tabPos' => 1440, 'left' => 1440, 'hanging' => 360),
  512. array('format' => NumberFormat::LOWER_ROMAN, 'text' => '%3.', 'alignment' => 'right', 'tabPos' => 2160, 'left' => 2160, 'hanging' => 180),
  513. array('format' => NumberFormat::DECIMAL, 'text' => '%4.', 'alignment' => 'left', 'tabPos' => 2880, 'left' => 2880, 'hanging' => 360),
  514. array('format' => NumberFormat::LOWER_LETTER, 'text' => '%5.', 'alignment' => 'left', 'tabPos' => 3600, 'left' => 3600, 'hanging' => 360),
  515. array('format' => NumberFormat::LOWER_ROMAN, 'text' => '%6.', 'alignment' => 'right', 'tabPos' => 4320, 'left' => 4320, 'hanging' => 180),
  516. array('format' => NumberFormat::DECIMAL, 'text' => '%7.', 'alignment' => 'left', 'tabPos' => 5040, 'left' => 5040, 'hanging' => 360),
  517. array('format' => NumberFormat::LOWER_LETTER, 'text' => '%8.', 'alignment' => 'left', 'tabPos' => 5760, 'left' => 5760, 'hanging' => 360),
  518. array('format' => NumberFormat::LOWER_ROMAN, 'text' => '%9.', 'alignment' => 'right', 'tabPos' => 6480, 'left' => 6480, 'hanging' => 180),
  519. ),
  520. );
  521. }
  522. return array(
  523. 'type' => 'hybridMultilevel',
  524. 'levels' => array(
  525. array('format' => NumberFormat::BULLET, 'text' => '', 'alignment' => 'left', 'tabPos' => 720, 'left' => 720, 'hanging' => 360, 'font' => 'Symbol', 'hint' => 'default'),
  526. array('format' => NumberFormat::BULLET, 'text' => 'o', 'alignment' => 'left', 'tabPos' => 1440, 'left' => 1440, 'hanging' => 360, 'font' => 'Courier New', 'hint' => 'default'),
  527. array('format' => NumberFormat::BULLET, 'text' => '', 'alignment' => 'left', 'tabPos' => 2160, 'left' => 2160, 'hanging' => 360, 'font' => 'Wingdings', 'hint' => 'default'),
  528. array('format' => NumberFormat::BULLET, 'text' => '', 'alignment' => 'left', 'tabPos' => 2880, 'left' => 2880, 'hanging' => 360, 'font' => 'Symbol', 'hint' => 'default'),
  529. array('format' => NumberFormat::BULLET, 'text' => 'o', 'alignment' => 'left', 'tabPos' => 3600, 'left' => 3600, 'hanging' => 360, 'font' => 'Courier New', 'hint' => 'default'),
  530. array('format' => NumberFormat::BULLET, 'text' => '', 'alignment' => 'left', 'tabPos' => 4320, 'left' => 4320, 'hanging' => 360, 'font' => 'Wingdings', 'hint' => 'default'),
  531. array('format' => NumberFormat::BULLET, 'text' => '', 'alignment' => 'left', 'tabPos' => 5040, 'left' => 5040, 'hanging' => 360, 'font' => 'Symbol', 'hint' => 'default'),
  532. array('format' => NumberFormat::BULLET, 'text' => 'o', 'alignment' => 'left', 'tabPos' => 5760, 'left' => 5760, 'hanging' => 360, 'font' => 'Courier New', 'hint' => 'default'),
  533. array('format' => NumberFormat::BULLET, 'text' => '', 'alignment' => 'left', 'tabPos' => 6480, 'left' => 6480, 'hanging' => 360, 'font' => 'Wingdings', 'hint' => 'default'),
  534. ),
  535. );
  536. }
  537. /**
  538. * Parse list item node
  539. *
  540. * @param \DOMNode $node
  541. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  542. * @param array &$styles
  543. * @param array $data
  544. *
  545. * @todo This function is almost the same like `parseChildNodes`. Merged?
  546. * @todo As soon as ListItem inherits from AbstractContainer or TextRun delete parsing part of childNodes
  547. */
  548. protected static function parseListItem($node, $element, &$styles, $data)
  549. {
  550. $cNodes = $node->childNodes;
  551. if (!empty($cNodes)) {
  552. $listRun = $element->addListItemRun($data['listdepth'], $styles['list'], $styles['paragraph']);
  553. foreach ($cNodes as $cNode) {
  554. self::parseNode($cNode, $listRun, $styles, $data);
  555. }
  556. }
  557. }
  558. /**
  559. * Parse style
  560. *
  561. * @param \DOMAttr $attribute
  562. * @param array $styles
  563. * @return array
  564. */
  565. protected static function parseStyle($attribute, $styles)
  566. {
  567. $properties = explode(';', trim($attribute->value, " \t\n\r\0\x0B;"));
  568. foreach ($properties as $property) {
  569. list($cKey, $cValue) = array_pad(explode(':', $property, 2), 2, null);
  570. $cValue = trim($cValue);
  571. $cKey = strtolower(trim($cKey));
  572. switch ($cKey) {
  573. case 'text-decoration':
  574. switch ($cValue) {
  575. case 'underline':
  576. $styles['underline'] = 'single';
  577. break;
  578. case 'line-through':
  579. $styles['strikethrough'] = true;
  580. break;
  581. }
  582. break;
  583. case 'text-align':
  584. $styles['alignment'] = self::mapAlign($cValue);
  585. break;
  586. case 'display':
  587. $styles['hidden'] = $cValue === 'none' || $cValue === 'hidden';
  588. break;
  589. case 'direction':
  590. $styles['rtl'] = $cValue === 'rtl';
  591. break;
  592. case 'font-size':
  593. $styles['size'] = Converter::cssToPoint($cValue);
  594. break;
  595. case 'font-family':
  596. $cValue = array_map('trim', explode(',', $cValue));
  597. $styles['name'] = ucwords($cValue[0]);
  598. break;
  599. case 'color':
  600. $styles['color'] = trim($cValue, '#');
  601. break;
  602. case 'background-color':
  603. $styles['bgColor'] = trim($cValue, '#');
  604. break;
  605. case 'line-height':
  606. $matches = array();
  607. if ($cValue === 'normal') {
  608. $spacingLineRule = \PhpOffice\PhpWord\SimpleType\LineSpacingRule::AUTO;
  609. $spacing = 0;
  610. } elseif (preg_match('/([0-9]+\.?[0-9]*[a-z]+)/', $cValue, $matches)) {
  611. //matches number with a unit, e.g. 12px, 15pt, 20mm, ...
  612. $spacingLineRule = \PhpOffice\PhpWord\SimpleType\LineSpacingRule::EXACT;
  613. $spacing = Converter::cssToTwip($matches[1]);
  614. } elseif (preg_match('/([0-9]+)%/', $cValue, $matches)) {
  615. //matches percentages
  616. $spacingLineRule = \PhpOffice\PhpWord\SimpleType\LineSpacingRule::AUTO;
  617. //we are subtracting 1 line height because the Spacing writer is adding one line
  618. $spacing = ((((int) $matches[1]) / 100) * Paragraph::LINE_HEIGHT) - Paragraph::LINE_HEIGHT;
  619. } else {
  620. //any other, wich is a multiplier. E.g. 1.2
  621. $spacingLineRule = \PhpOffice\PhpWord\SimpleType\LineSpacingRule::AUTO;
  622. //we are subtracting 1 line height because the Spacing writer is adding one line
  623. $spacing = ($cValue * Paragraph::LINE_HEIGHT) - Paragraph::LINE_HEIGHT;
  624. }
  625. $styles['spacingLineRule'] = $spacingLineRule;
  626. $styles['line-spacing'] = $spacing;
  627. break;
  628. case 'letter-spacing':
  629. $styles['letter-spacing'] = Converter::cssToTwip($cValue);
  630. break;
  631. case 'text-indent':
  632. $styles['indentation']['firstLine'] = Converter::cssToTwip($cValue);
  633. break;
  634. case 'font-weight':
  635. $tValue = false;
  636. if (preg_match('#bold#', $cValue)) {
  637. $tValue = true; // also match bolder
  638. }
  639. $styles['bold'] = $tValue;
  640. break;
  641. case 'font-style':
  642. $tValue = false;
  643. if (preg_match('#(?:italic|oblique)#', $cValue)) {
  644. $tValue = true;
  645. }
  646. $styles['italic'] = $tValue;
  647. break;
  648. case 'margin':
  649. $cValue = Converter::cssToTwip($cValue);
  650. $styles['spaceBefore'] = $cValue;
  651. $styles['spaceAfter'] = $cValue;
  652. break;
  653. case 'margin-top':
  654. // BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($cValue)
  655. $styles['spaceBefore'] = Converter::cssToTwip($cValue);
  656. break;
  657. case 'margin-bottom':
  658. // BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($cValue)
  659. $styles['spaceAfter'] = Converter::cssToTwip($cValue);
  660. break;
  661. case 'border-color':
  662. self::mapBorderColor($styles, $cValue);
  663. break;
  664. case 'border-width':
  665. $styles['borderSize'] = Converter::cssToPoint($cValue);
  666. break;
  667. case 'border-style':
  668. $styles['borderStyle'] = self::mapBorderStyle($cValue);
  669. break;
  670. case 'width':
  671. if (preg_match('/([0-9]+[a-z]+)/', $cValue, $matches)) {
  672. $styles['width'] = Converter::cssToTwip($matches[1]);
  673. $styles['unit'] = \PhpOffice\PhpWord\SimpleType\TblWidth::TWIP;
  674. } elseif (preg_match('/([0-9]+)%/', $cValue, $matches)) {
  675. $styles['width'] = $matches[1] * 50;
  676. $styles['unit'] = \PhpOffice\PhpWord\SimpleType\TblWidth::PERCENT;
  677. } elseif (preg_match('/([0-9]+)/', $cValue, $matches)) {
  678. $styles['width'] = $matches[1];
  679. $styles['unit'] = \PhpOffice\PhpWord\SimpleType\TblWidth::AUTO;
  680. }
  681. break;
  682. case 'border':
  683. case 'border-top':
  684. case 'border-bottom':
  685. case 'border-right':
  686. case 'border-left':
  687. // must have exact order [width color style], e.g. "1px #0011CC solid" or "2pt green solid"
  688. // Word does not accept shortened hex colors e.g. #CCC, only full e.g. #CCCCCC
  689. if (preg_match('/([0-9]+[^0-9]*)\s+(\#[a-fA-F0-9]+|[a-zA-Z]+)\s+([a-z]+)/', $cValue, $matches)) {
  690. if (false !== strpos($cKey, '-')) {
  691. $tmp = explode('-', $cKey);
  692. $which = $tmp[1];
  693. $which = ucfirst($which); // e.g. bottom -> Bottom
  694. } else {
  695. $which = '';
  696. }
  697. // Note - border width normalization:
  698. // Width of border in Word is calculated differently than HTML borders, usually showing up too bold.
  699. // Smallest 1px (or 1pt) appears in Word like 2-3px/pt in HTML once converted to twips.
  700. // Therefore we need to normalize converted twip value to cca 1/2 of value.
  701. // This may be adjusted, if better ratio or formula found.
  702. // BC change: up to ver. 0.17.0 was $size converted to points - Converter::cssToPoint($size)
  703. $size = Converter::cssToTwip($matches[1]);
  704. $size = (int) ($size / 2);
  705. // valid variants may be e.g. borderSize, borderTopSize, borderLeftColor, etc ..
  706. $styles["border{$which}Size"] = $size; // twips
  707. $styles["border{$which}Color"] = trim($matches[2], '#');
  708. $styles["border{$which}Style"] = self::mapBorderStyle($matches[3]);
  709. }
  710. break;
  711. case 'vertical-align':
  712. // https://developer.mozilla.org/en-US/docs/Web/CSS/vertical-align
  713. if (preg_match('#(?:top|bottom|middle|sub|baseline)#i', $cValue, $matches)) {
  714. $styles['valign'] = self::mapAlignVertical($matches[0]);
  715. }
  716. break;
  717. }
  718. }
  719. return $styles;
  720. }
  721. /**
  722. * Parse image node
  723. *
  724. * @param \DOMNode $node
  725. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  726. *
  727. * @return \PhpOffice\PhpWord\Element\Image
  728. **/
  729. protected static function parseImage($node, $element)
  730. {
  731. $style = array();
  732. $src = null;
  733. foreach ($node->attributes as $attribute) {
  734. switch ($attribute->name) {
  735. case 'src':
  736. $src = $attribute->value;
  737. break;
  738. case 'width':
  739. $width = $attribute->value;
  740. $style['width'] = $width;
  741. $style['unit'] = \PhpOffice\PhpWord\Style\Image::UNIT_PX;
  742. break;
  743. case 'height':
  744. $height = $attribute->value;
  745. $style['height'] = $height;
  746. $style['unit'] = \PhpOffice\PhpWord\Style\Image::UNIT_PX;
  747. break;
  748. case 'style':
  749. $styleattr = explode(';', $attribute->value);
  750. foreach ($styleattr as $attr) {
  751. if (strpos($attr, ':')) {
  752. list($k, $v) = explode(':', $attr);
  753. switch ($k) {
  754. case 'float':
  755. if (trim($v) == 'right') {
  756. $style['hPos'] = \PhpOffice\PhpWord\Style\Image::POS_RIGHT;
  757. $style['hPosRelTo'] = \PhpOffice\PhpWord\Style\Image::POS_RELTO_MARGIN; // inner section area
  758. $style['pos'] = \PhpOffice\PhpWord\Style\Image::POS_RELATIVE;
  759. $style['wrap'] = \PhpOffice\PhpWord\Style\Image::WRAP_TIGHT;
  760. $style['overlap'] = true;
  761. }
  762. if (trim($v) == 'left') {
  763. $style['hPos'] = \PhpOffice\PhpWord\Style\Image::POS_LEFT;
  764. $style['hPosRelTo'] = \PhpOffice\PhpWord\Style\Image::POS_RELTO_MARGIN; // inner section area
  765. $style['pos'] = \PhpOffice\PhpWord\Style\Image::POS_RELATIVE;
  766. $style['wrap'] = \PhpOffice\PhpWord\Style\Image::WRAP_TIGHT;
  767. $style['overlap'] = true;
  768. }
  769. break;
  770. }
  771. }
  772. }
  773. break;
  774. }
  775. }
  776. $originSrc = $src;
  777. if (strpos($src, 'data:image') !== false) {
  778. $tmpDir = Settings::getTempDir() . '/';
  779. $match = array();
  780. preg_match('/data:image\/(\w+);base64,(.+)/', $src, $match);
  781. $src = $imgFile = $tmpDir . uniqid() . '.' . $match[1];
  782. $ifp = fopen($imgFile, 'wb');
  783. if ($ifp !== false) {
  784. fwrite($ifp, base64_decode($match[2]));
  785. fclose($ifp);
  786. }
  787. }
  788. $src = urldecode($src);
  789. if (!is_file($src)
  790. && !is_null(self::$options)
  791. && isset(self::$options['IMG_SRC_SEARCH'])
  792. && isset(self::$options['IMG_SRC_REPLACE'])) {
  793. $src = str_replace(self::$options['IMG_SRC_SEARCH'], self::$options['IMG_SRC_REPLACE'], $src);
  794. }
  795. if (!is_file($src)) {
  796. if ($imgBlob = @file_get_contents($src)) {
  797. $tmpDir = Settings::getTempDir() . '/';
  798. $match = array();
  799. preg_match('/.+\.(\w+)$/', $src, $match);
  800. $src = $tmpDir . uniqid() . '.' . $match[1];
  801. $ifp = fopen($src, 'wb');
  802. if ($ifp !== false) {
  803. fwrite($ifp, $imgBlob);
  804. fclose($ifp);
  805. }
  806. }
  807. }
  808. if (is_file($src)) {
  809. $newElement = $element->addImage($src, $style);
  810. } else {
  811. throw new \Exception("Could not load image $originSrc");
  812. }
  813. return $newElement;
  814. }
  815. /**
  816. * Transforms a CSS border style into a word border style
  817. *
  818. * @param string $cssBorderStyle
  819. * @return null|string
  820. */
  821. protected static function mapBorderStyle($cssBorderStyle)
  822. {
  823. switch ($cssBorderStyle) {
  824. case 'none':
  825. case 'dashed':
  826. case 'dotted':
  827. case 'double':
  828. return $cssBorderStyle;
  829. default:
  830. return 'single';
  831. }
  832. }
  833. protected static function mapBorderColor(&$styles, $cssBorderColor)
  834. {
  835. $numColors = substr_count($cssBorderColor, '#');
  836. if ($numColors === 1) {
  837. $styles['borderColor'] = trim($cssBorderColor, '#');
  838. } elseif ($numColors > 1) {
  839. $colors = explode(' ', $cssBorderColor);
  840. $borders = array('borderTopColor', 'borderRightColor', 'borderBottomColor', 'borderLeftColor');
  841. for ($i = 0; $i < min(4, $numColors, count($colors)); $i++) {
  842. $styles[$borders[$i]] = trim($colors[$i], '#');
  843. }
  844. }
  845. }
  846. /**
  847. * Transforms a HTML/CSS alignment into a \PhpOffice\PhpWord\SimpleType\Jc
  848. *
  849. * @param string $cssAlignment
  850. * @return string|null
  851. */
  852. protected static function mapAlign($cssAlignment)
  853. {
  854. switch ($cssAlignment) {
  855. case 'right':
  856. return Jc::END;
  857. case 'center':
  858. return Jc::CENTER;
  859. case 'justify':
  860. return Jc::BOTH;
  861. default:
  862. return Jc::START;
  863. }
  864. }
  865. /**
  866. * Transforms a HTML/CSS vertical alignment
  867. *
  868. * @param string $alignment
  869. * @return string|null
  870. */
  871. protected static function mapAlignVertical($alignment)
  872. {
  873. $alignment = strtolower($alignment);
  874. switch ($alignment) {
  875. case 'top':
  876. case 'baseline':
  877. case 'bottom':
  878. return $alignment;
  879. case 'middle':
  880. return 'center';
  881. case 'sub':
  882. return 'bottom';
  883. case 'text-top':
  884. case 'baseline':
  885. return 'top';
  886. default:
  887. // @discuss - which one should apply:
  888. // - Word uses default vert. alignment: top
  889. // - all browsers use default vert. alignment: middle
  890. // Returning empty string means attribute wont be set so use Word default (top).
  891. return '';
  892. }
  893. }
  894. /**
  895. * Map list style for ordered list
  896. *
  897. * @param string $cssListType
  898. */
  899. protected static function mapListType($cssListType)
  900. {
  901. switch ($cssListType) {
  902. case 'a':
  903. return NumberFormat::LOWER_LETTER; // a, b, c, ..
  904. case 'A':
  905. return NumberFormat::UPPER_LETTER; // A, B, C, ..
  906. case 'i':
  907. return NumberFormat::LOWER_ROMAN; // i, ii, iii, iv, ..
  908. case 'I':
  909. return NumberFormat::UPPER_ROMAN; // I, II, III, IV, ..
  910. case '1':
  911. default:
  912. return NumberFormat::DECIMAL; // 1, 2, 3, ..
  913. }
  914. }
  915. /**
  916. * Parse line break
  917. *
  918. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  919. */
  920. protected static function parseLineBreak($element)
  921. {
  922. $element->addTextBreak();
  923. }
  924. /**
  925. * Parse link node
  926. *
  927. * @param \DOMNode $node
  928. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  929. * @param array $styles
  930. */
  931. protected static function parseLink($node, $element, &$styles)
  932. {
  933. $target = null;
  934. foreach ($node->attributes as $attribute) {
  935. switch ($attribute->name) {
  936. case 'href':
  937. $target = $attribute->value;
  938. break;
  939. }
  940. }
  941. $styles['font'] = self::parseInlineStyle($node, $styles['font']);
  942. if (strpos($target, '#') === 0) {
  943. return $element->addLink(substr($target, 1), $node->textContent, $styles['font'], $styles['paragraph'], true);
  944. }
  945. return $element->addLink($target, $node->textContent, $styles['font'], $styles['paragraph']);
  946. }
  947. /**
  948. * Render horizontal rule
  949. * Note: Word rule is not the same as HTML's <hr> since it does not support width and thus neither alignment
  950. *
  951. * @param \DOMNode $node
  952. * @param \PhpOffice\PhpWord\Element\AbstractContainer $element
  953. */
  954. protected static function parseHorizRule($node, $element)
  955. {
  956. $styles = self::parseInlineStyle($node);
  957. // <hr> is implemented as an empty paragraph - extending 100% inside the section
  958. // Some properties may be controlled, e.g. <hr style="border-bottom: 3px #DDDDDD solid; margin-bottom: 0;">
  959. $fontStyle = $styles + array('size' => 3);
  960. $paragraphStyle = $styles + array(
  961. 'lineHeight' => 0.25, // multiply default line height - e.g. 1, 1.5 etc
  962. 'spacing' => 0, // twip
  963. 'spaceBefore' => 120, // twip, 240/2 (default line height)
  964. 'spaceAfter' => 120, // twip
  965. 'borderBottomSize' => empty($styles['line-height']) ? 1 : $styles['line-height'],
  966. 'borderBottomColor' => empty($styles['color']) ? '000000' : $styles['color'],
  967. 'borderBottomStyle' => 'single', // same as "solid"
  968. );
  969. $element->addText('', $fontStyle, $paragraphStyle);
  970. // Notes: <hr/> cannot be:
  971. // - table - throws error "cannot be inside textruns", e.g. lists
  972. // - line - that is a shape, has different behaviour
  973. // - repeated text, e.g. underline "_", because of unpredictable line wrapping
  974. }
  975. }