OLERead.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323
  1. <?php
  2. /**
  3. * This file is part of PHPWord - A pure PHP library for reading and writing
  4. * word processing documents.
  5. *
  6. * PHPWord is free software distributed under the terms of the GNU Lesser
  7. * General Public License version 3 as published by the Free Software Foundation.
  8. *
  9. * For the full copyright and license information, please read the LICENSE
  10. * file that was distributed with this source code. For the full list of
  11. * contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
  12. *
  13. * @see https://github.com/PHPOffice/PHPWord
  14. * @copyright 2010-2018 PHPWord contributors
  15. * @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
  16. */
  17. namespace PhpOffice\PhpWord\Shared;
  18. use PhpOffice\PhpWord\Exception\Exception;
  19. defined('IDENTIFIER_OLE') ||
  20. define('IDENTIFIER_OLE', pack('CCCCCCCC', 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1));
  21. class OLERead
  22. {
  23. private $data = '';
  24. // OLE identifier
  25. const IDENTIFIER_OLE = IDENTIFIER_OLE;
  26. // Size of a sector = 512 bytes
  27. const BIG_BLOCK_SIZE = 0x200;
  28. // Size of a short sector = 64 bytes
  29. const SMALL_BLOCK_SIZE = 0x40;
  30. // Size of a directory entry always = 128 bytes
  31. const PROPERTY_STORAGE_BLOCK_SIZE = 0x80;
  32. // Minimum size of a standard stream = 4096 bytes, streams smaller than this are stored as short streams
  33. const SMALL_BLOCK_THRESHOLD = 0x1000;
  34. // header offsets
  35. const NUM_BIG_BLOCK_DEPOT_BLOCKS_POS = 0x2c;
  36. const ROOT_START_BLOCK_POS = 0x30;
  37. const SMALL_BLOCK_DEPOT_BLOCK_POS = 0x3c;
  38. const EXTENSION_BLOCK_POS = 0x44;
  39. const NUM_EXTENSION_BLOCK_POS = 0x48;
  40. const BIG_BLOCK_DEPOT_BLOCKS_POS = 0x4c;
  41. // property storage offsets (directory offsets)
  42. const SIZE_OF_NAME_POS = 0x40;
  43. const TYPE_POS = 0x42;
  44. const START_BLOCK_POS = 0x74;
  45. const SIZE_POS = 0x78;
  46. public $wrkdocument = null;
  47. public $wrk1Table = null;
  48. public $wrkData = null;
  49. public $wrkObjectPool = null;
  50. public $summaryInformation = null;
  51. public $docSummaryInfos = null;
  52. /**
  53. * Read the file
  54. *
  55. * @param $sFileName string Filename
  56. *
  57. * @throws Exception
  58. */
  59. public function read($sFileName)
  60. {
  61. // Check if file exists and is readable
  62. if (!is_readable($sFileName)) {
  63. throw new Exception('Could not open ' . $sFileName . ' for reading! File does not exist, or it is not readable.');
  64. }
  65. // Get the file identifier
  66. // Don't bother reading the whole file until we know it's a valid OLE file
  67. $this->data = file_get_contents($sFileName, false, null, 0, 8);
  68. // Check OLE identifier
  69. if ($this->data != self::IDENTIFIER_OLE) {
  70. throw new Exception('The filename ' . $sFileName . ' is not recognised as an OLE file');
  71. }
  72. // Get the file data
  73. $this->data = file_get_contents($sFileName);
  74. // Total number of sectors used for the SAT
  75. $this->numBigBlockDepotBlocks = self::getInt4d($this->data, self::NUM_BIG_BLOCK_DEPOT_BLOCKS_POS);
  76. // SecID of the first sector of the directory stream
  77. $this->rootStartBlock = self::getInt4d($this->data, self::ROOT_START_BLOCK_POS);
  78. // SecID of the first sector of the SSAT (or -2 if not extant)
  79. $this->sbdStartBlock = self::getInt4d($this->data, self::SMALL_BLOCK_DEPOT_BLOCK_POS);
  80. // SecID of the first sector of the MSAT (or -2 if no additional sectors are used)
  81. $this->extensionBlock = self::getInt4d($this->data, self::EXTENSION_BLOCK_POS);
  82. // Total number of sectors used by MSAT
  83. $this->numExtensionBlocks = self::getInt4d($this->data, self::NUM_EXTENSION_BLOCK_POS);
  84. $bigBlockDepotBlocks = array();
  85. $pos = self::BIG_BLOCK_DEPOT_BLOCKS_POS;
  86. $bbdBlocks = $this->numBigBlockDepotBlocks;
  87. // @codeCoverageIgnoreStart
  88. if ($this->numExtensionBlocks != 0) {
  89. $bbdBlocks = (self::BIG_BLOCK_SIZE - self::BIG_BLOCK_DEPOT_BLOCKS_POS) / 4;
  90. }
  91. // @codeCoverageIgnoreEnd
  92. for ($i = 0; $i < $bbdBlocks; ++$i) {
  93. $bigBlockDepotBlocks[$i] = self::getInt4d($this->data, $pos);
  94. $pos += 4;
  95. }
  96. // @codeCoverageIgnoreStart
  97. for ($j = 0; $j < $this->numExtensionBlocks; ++$j) {
  98. $pos = ($this->extensionBlock + 1) * self::BIG_BLOCK_SIZE;
  99. $blocksToRead = min($this->numBigBlockDepotBlocks - $bbdBlocks, self::BIG_BLOCK_SIZE / 4 - 1);
  100. for ($i = $bbdBlocks; $i < $bbdBlocks + $blocksToRead; ++$i) {
  101. $bigBlockDepotBlocks[$i] = self::getInt4d($this->data, $pos);
  102. $pos += 4;
  103. }
  104. $bbdBlocks += $blocksToRead;
  105. if ($bbdBlocks < $this->numBigBlockDepotBlocks) {
  106. $this->extensionBlock = self::getInt4d($this->data, $pos);
  107. }
  108. }
  109. // @codeCoverageIgnoreEnd
  110. $pos = 0;
  111. $this->bigBlockChain = '';
  112. $bbs = self::BIG_BLOCK_SIZE / 4;
  113. for ($i = 0; $i < $this->numBigBlockDepotBlocks; ++$i) {
  114. $pos = ($bigBlockDepotBlocks[$i] + 1) * self::BIG_BLOCK_SIZE;
  115. $this->bigBlockChain .= substr($this->data, $pos, 4 * $bbs);
  116. $pos += 4 * $bbs;
  117. }
  118. $pos = 0;
  119. $sbdBlock = $this->sbdStartBlock;
  120. $this->smallBlockChain = '';
  121. while ($sbdBlock != -2) {
  122. $pos = ($sbdBlock + 1) * self::BIG_BLOCK_SIZE;
  123. $this->smallBlockChain .= substr($this->data, $pos, 4 * $bbs);
  124. $pos += 4 * $bbs;
  125. $sbdBlock = self::getInt4d($this->bigBlockChain, $sbdBlock * 4);
  126. }
  127. // read the directory stream
  128. $block = $this->rootStartBlock;
  129. $this->entry = $this->readData($block);
  130. $this->readPropertySets();
  131. }
  132. /**
  133. * Extract binary stream data
  134. *
  135. * @param mixed $stream
  136. * @return string
  137. */
  138. public function getStream($stream)
  139. {
  140. if ($stream === null) {
  141. return null;
  142. }
  143. $streamData = '';
  144. if ($this->props[$stream]['size'] < self::SMALL_BLOCK_THRESHOLD) {
  145. $rootdata = $this->readData($this->props[$this->rootentry]['startBlock']);
  146. $block = $this->props[$stream]['startBlock'];
  147. while ($block != -2) {
  148. $pos = $block * self::SMALL_BLOCK_SIZE;
  149. $streamData .= substr($rootdata, $pos, self::SMALL_BLOCK_SIZE);
  150. $block = self::getInt4d($this->smallBlockChain, $block * 4);
  151. }
  152. return $streamData;
  153. }
  154. $numBlocks = $this->props[$stream]['size'] / self::BIG_BLOCK_SIZE;
  155. if ($this->props[$stream]['size'] % self::BIG_BLOCK_SIZE != 0) {
  156. ++$numBlocks;
  157. }
  158. if ($numBlocks == 0) {
  159. return ''; // @codeCoverageIgnore
  160. }
  161. $block = $this->props[$stream]['startBlock'];
  162. while ($block != -2) {
  163. $pos = ($block + 1) * self::BIG_BLOCK_SIZE;
  164. $streamData .= substr($this->data, $pos, self::BIG_BLOCK_SIZE);
  165. $block = self::getInt4d($this->bigBlockChain, $block * 4);
  166. }
  167. return $streamData;
  168. }
  169. /**
  170. * Read a standard stream (by joining sectors using information from SAT)
  171. *
  172. * @param int $blSectorId Sector ID where the stream starts
  173. * @return string Data for standard stream
  174. */
  175. private function readData($blSectorId)
  176. {
  177. $block = $blSectorId;
  178. $data = '';
  179. while ($block != -2) {
  180. $pos = ($block + 1) * self::BIG_BLOCK_SIZE;
  181. $data .= substr($this->data, $pos, self::BIG_BLOCK_SIZE);
  182. $block = self::getInt4d($this->bigBlockChain, $block * 4);
  183. }
  184. return $data;
  185. }
  186. /**
  187. * Read entries in the directory stream.
  188. */
  189. private function readPropertySets()
  190. {
  191. $offset = 0;
  192. // loop through entires, each entry is 128 bytes
  193. $entryLen = strlen($this->entry);
  194. while ($offset < $entryLen) {
  195. // entry data (128 bytes)
  196. $data = substr($this->entry, $offset, self::PROPERTY_STORAGE_BLOCK_SIZE);
  197. // size in bytes of name
  198. $nameSize = ord($data[self::SIZE_OF_NAME_POS]) | (ord($data[self::SIZE_OF_NAME_POS + 1]) << 8);
  199. // type of entry
  200. $type = ord($data[self::TYPE_POS]);
  201. // sectorID of first sector or short sector, if this entry refers to a stream (the case with workbook)
  202. // sectorID of first sector of the short-stream container stream, if this entry is root entry
  203. $startBlock = self::getInt4d($data, self::START_BLOCK_POS);
  204. $size = self::getInt4d($data, self::SIZE_POS);
  205. $name = str_replace("\x00", '', substr($data, 0, $nameSize));
  206. $this->props[] = array(
  207. 'name' => $name,
  208. 'type' => $type,
  209. 'startBlock' => $startBlock,
  210. 'size' => $size, );
  211. // tmp helper to simplify checks
  212. $upName = strtoupper($name);
  213. // Workbook directory entry (BIFF5 uses Book, BIFF8 uses Workbook)
  214. // print_r($upName.PHP_EOL);
  215. if (($upName === 'WORDDOCUMENT')) {
  216. $this->wrkdocument = count($this->props) - 1;
  217. } elseif ($upName === '1TABLE') {
  218. $this->wrk1Table = count($this->props) - 1;
  219. } elseif ($upName === 'DATA') {
  220. $this->wrkData = count($this->props) - 1;
  221. } elseif ($upName === 'OBJECTPOOL') {
  222. $this->wrkObjectPoolelseif = count($this->props) - 1;
  223. } elseif ($upName === 'ROOT ENTRY' || $upName === 'R') {
  224. $this->rootentry = count($this->props) - 1;
  225. }
  226. // Summary information
  227. if ($name == chr(5) . 'SummaryInformation') {
  228. $this->summaryInformation = count($this->props) - 1;
  229. }
  230. // Additional Document Summary information
  231. if ($name == chr(5) . 'DocumentSummaryInformation') {
  232. $this->docSummaryInfos = count($this->props) - 1;
  233. }
  234. $offset += self::PROPERTY_STORAGE_BLOCK_SIZE;
  235. }
  236. }
  237. /**
  238. * Read 4 bytes of data at specified position
  239. *
  240. * @param string $data
  241. * @param int $pos
  242. * @return int
  243. */
  244. private static function getInt4d($data, $pos)
  245. {
  246. // FIX: represent numbers correctly on 64-bit system
  247. // http://sourceforge.net/tracker/index.php?func=detail&aid=1487372&group_id=99160&atid=623334
  248. // Hacked by Andreas Rehm 2006 to ensure correct result of the <<24 block on 32 and 64bit systems
  249. $or24 = ord($data[$pos + 3]);
  250. if ($or24 >= 128) {
  251. // negative number
  252. $ord24 = -abs((256 - $or24) << 24);
  253. } else {
  254. $ord24 = ($or24 & 127) << 24;
  255. }
  256. return ord($data[$pos]) | (ord($data[$pos + 1]) << 8) | (ord($data[$pos + 2]) << 16) | $ord24;
  257. }
  258. }