turndown.js 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154
  1. var TurndownService = (function () {
  2. 'use strict';
  3. function extend(destination) {
  4. for (var i = 1; i < arguments.length; i++) {
  5. var source = arguments[i];
  6. for (var key in source) {
  7. if (source.hasOwnProperty(key)) destination[key] = source[key];
  8. }
  9. }
  10. return destination
  11. }
  12. function repeat(character, count) {
  13. return Array(count + 1).join(character)
  14. }
  15. var blockElements = [
  16. 'ADDRESS', 'ARTICLE', 'ASIDE', 'AUDIO', 'BLOCKQUOTE', 'BODY', 'CANVAS',
  17. 'CENTER', 'DD', 'DIR', 'DIV', 'DL', 'DT', 'FIELDSET', 'FIGCAPTION', 'FIGURE',
  18. 'FOOTER', 'FORM', 'FRAMESET', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'HEADER',
  19. 'HGROUP', 'HR', 'HTML', 'ISINDEX', 'LI', 'MAIN', 'MENU', 'NAV', 'NOFRAMES',
  20. 'NOSCRIPT', 'OL', 'OUTPUT', 'P', 'PRE', 'SECTION', 'TABLE', 'TBODY', 'TD',
  21. 'TFOOT', 'TH', 'THEAD', 'TR', 'UL'
  22. ];
  23. function isBlock(node) {
  24. return is(node, blockElements)
  25. }
  26. var voidElements = [
  27. 'AREA', 'BASE', 'BR', 'COL', 'COMMAND', 'EMBED', 'HR', 'IMG', 'INPUT',
  28. 'KEYGEN', 'LINK', 'META', 'PARAM', 'SOURCE', 'TRACK', 'WBR'
  29. ];
  30. function isVoid(node) {
  31. return is(node, voidElements)
  32. }
  33. function hasVoid(node) {
  34. return has(node, voidElements)
  35. }
  36. var meaningfulWhenBlankElements = [
  37. 'A', 'TABLE', 'THEAD', 'TBODY', 'TFOOT', 'TH', 'TD', 'IFRAME', 'SCRIPT',
  38. 'AUDIO', 'VIDEO'
  39. ];
  40. function isMeaningfulWhenBlank(node) {
  41. return is(node, meaningfulWhenBlankElements)
  42. }
  43. function hasMeaningfulWhenBlank(node) {
  44. return has(node, meaningfulWhenBlankElements)
  45. }
  46. function is(node, tagNames) {
  47. return tagNames.indexOf(node.nodeName) >= 0
  48. }
  49. function has(node, tagNames) {
  50. return (
  51. node.getElementsByTagName &&
  52. tagNames.some(function (tagName) {
  53. return node.getElementsByTagName(tagName).length
  54. })
  55. )
  56. }
  57. var rules = {};
  58. rules.paragraph = {
  59. filter: 'p',
  60. replacement: function (content) {
  61. return '\n\n' + content + '\n\n'
  62. }
  63. };
  64. rules.lineBreak = {
  65. filter: 'br',
  66. replacement: function (content, node, options) {
  67. return options.br + '\n'
  68. }
  69. };
  70. rules.heading = {
  71. filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'],
  72. replacement: function (content, node, options) {
  73. var hLevel = Number(node.nodeName.charAt(1));
  74. if (options.headingStyle === 'setext' && hLevel < 3) {
  75. var underline = repeat((hLevel === 1 ? '=' : '-'), content.length);
  76. return (
  77. '\n\n' + content + '\n' + underline + '\n\n'
  78. )
  79. } else {
  80. return '\n\n' + repeat('#', hLevel) + ' ' + content + '\n\n'
  81. }
  82. }
  83. };
  84. rules.blockquote = {
  85. filter: 'blockquote',
  86. replacement: function (content) {
  87. content = content.replace(/^\n+|\n+$/g, '');
  88. content = content.replace(/^/gm, '> ');
  89. return '\n\n' + content + '\n\n'
  90. }
  91. };
  92. rules.list = {
  93. filter: ['ul', 'ol'],
  94. replacement: function (content, node) {
  95. var parent = node.parentNode;
  96. if (parent.nodeName === 'LI' && parent.lastElementChild === node) {
  97. return '\n' + content
  98. } else {
  99. return '\n\n' + content + '\n\n'
  100. }
  101. }
  102. };
  103. rules.listItem = {
  104. filter: 'li',
  105. replacement: function (content, node, options) {
  106. content = content
  107. .replace(/^\n+/, '') // remove leading newlines
  108. .replace(/\n+$/, '\n') // replace trailing newlines with just a single one
  109. .replace(/\n/gm, '\n '); // indent
  110. var prefix = options.bulletListMarker + ' ';
  111. var parent = node.parentNode;
  112. if (parent.nodeName === 'OL') {
  113. var start = parent.getAttribute('start');
  114. var index = Array.prototype.indexOf.call(parent.children, node);
  115. prefix = (start ? Number(start) + index : index + 1) + '. ';
  116. }
  117. return (
  118. prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '')
  119. )
  120. }
  121. };
  122. rules.indentedCodeBlock = {
  123. filter: function (node, options) {
  124. return (
  125. options.codeBlockStyle === 'indented' &&
  126. node.nodeName === 'PRE' &&
  127. node.firstChild &&
  128. node.firstChild.nodeName === 'CODE'
  129. )
  130. },
  131. replacement: function (content, node, options) {
  132. return (
  133. '\n\n ' +
  134. node.firstChild.textContent.replace(/\n/g, '\n ') +
  135. '\n\n'
  136. )
  137. }
  138. };
  139. rules.fencedCodeBlock = {
  140. filter: function (node, options) {
  141. return (
  142. options.codeBlockStyle === 'fenced' &&
  143. node.nodeName === 'PRE' &&
  144. node.firstChild &&
  145. node.firstChild.nodeName === 'CODE'
  146. )
  147. },
  148. replacement: function (content, node, options) {
  149. var className = node.firstChild.getAttribute('class') || '';
  150. var language = (className.match(/language-(\S+)/) || [null, ''])[1];
  151. var code = node.firstChild.textContent;
  152. var fenceChar = options.fence.charAt(0);
  153. var fenceSize = 3;
  154. var fenceInCodeRegex = new RegExp('^' + fenceChar + '{3,}', 'gm');
  155. var match;
  156. while ((match = fenceInCodeRegex.exec(code))) {
  157. if (match[0].length >= fenceSize) {
  158. fenceSize = match[0].length + 1;
  159. }
  160. }
  161. var fence = repeat(fenceChar, fenceSize);
  162. return (
  163. '\n\n' + fence + language + '\n' +
  164. code.replace(/\n$/, '') +
  165. '\n' + fence + '\n\n'
  166. )
  167. }
  168. };
  169. rules.horizontalRule = {
  170. filter: 'hr',
  171. replacement: function (content, node, options) {
  172. return '\n\n' + options.hr + '\n\n'
  173. }
  174. };
  175. rules.inlineLink = {
  176. filter: function (node, options) {
  177. return (
  178. options.linkStyle === 'inlined' &&
  179. node.nodeName === 'A' &&
  180. node.getAttribute('href')
  181. )
  182. },
  183. replacement: function (content, node) {
  184. var href = node.getAttribute('href');
  185. var title = cleanAttribute(node.getAttribute('title'));
  186. if (title) title = ' "' + title + '"';
  187. return '[' + content + '](' + href + title + ')'
  188. }
  189. };
  190. rules.referenceLink = {
  191. filter: function (node, options) {
  192. return (
  193. options.linkStyle === 'referenced' &&
  194. node.nodeName === 'A' &&
  195. node.getAttribute('href')
  196. )
  197. },
  198. replacement: function (content, node, options) {
  199. var href = node.getAttribute('href');
  200. var title = cleanAttribute(node.getAttribute('title'));
  201. if (title) title = ' "' + title + '"';
  202. var replacement;
  203. var reference;
  204. switch (options.linkReferenceStyle) {
  205. case 'collapsed':
  206. replacement = '[' + content + '][]';
  207. reference = '[' + content + ']: ' + href + title;
  208. break
  209. case 'shortcut':
  210. replacement = '[' + content + ']';
  211. reference = '[' + content + ']: ' + href + title;
  212. break
  213. default:
  214. var id = this.references.length + 1;
  215. replacement = '[' + content + '][' + id + ']';
  216. reference = '[' + id + ']: ' + href + title;
  217. }
  218. this.references.push(reference);
  219. return replacement
  220. },
  221. references: [],
  222. append: function (options) {
  223. var references = '';
  224. if (this.references.length) {
  225. references = '\n\n' + this.references.join('\n') + '\n\n';
  226. this.references = []; // Reset references
  227. }
  228. return references
  229. }
  230. };
  231. rules.emphasis = {
  232. filter: ['em', 'i'],
  233. replacement: function (content, node, options) {
  234. if (!content.trim()) return ''
  235. return options.emDelimiter + content + options.emDelimiter
  236. }
  237. };
  238. rules.strong = {
  239. filter: ['strong', 'b'],
  240. replacement: function (content, node, options) {
  241. if (!content.trim()) return ''
  242. return options.strongDelimiter + content + options.strongDelimiter
  243. }
  244. };
  245. rules.code = {
  246. filter: function (node) {
  247. var hasSiblings = node.previousSibling || node.nextSibling;
  248. var isCodeBlock = node.parentNode.nodeName === 'PRE' && !hasSiblings;
  249. return node.nodeName === 'CODE' && !isCodeBlock
  250. },
  251. replacement: function (content) {
  252. if (!content.trim()) return ''
  253. var delimiter = '`';
  254. var leadingSpace = '';
  255. var trailingSpace = '';
  256. var matches = content.match(/`+/gm);
  257. if (matches) {
  258. if (/^`/.test(content)) leadingSpace = ' ';
  259. if (/`$/.test(content)) trailingSpace = ' ';
  260. while (matches.indexOf(delimiter) !== -1) delimiter = delimiter + '`';
  261. }
  262. return delimiter + leadingSpace + content + trailingSpace + delimiter
  263. }
  264. };
  265. rules.image = {
  266. filter: 'img',
  267. replacement: function (content, node) {
  268. var alt = cleanAttribute(node.getAttribute('alt'));
  269. var src = node.getAttribute('src') || '';
  270. var title = cleanAttribute(node.getAttribute('title'));
  271. var titlePart = title ? ' "' + title + '"' : '';
  272. return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
  273. }
  274. };
  275. function cleanAttribute(attribute) {
  276. return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : ''
  277. }
  278. /**
  279. * Manages a collection of rules used to convert HTML to Markdown
  280. */
  281. function Rules(options) {
  282. this.options = options;
  283. this._keep = [];
  284. this._remove = [];
  285. this.blankRule = {
  286. replacement: options.blankReplacement
  287. };
  288. this.keepReplacement = options.keepReplacement;
  289. this.defaultRule = {
  290. replacement: options.defaultReplacement
  291. };
  292. this.array = [];
  293. for (var key in options.rules) this.array.push(options.rules[key]);
  294. }
  295. Rules.prototype = {
  296. add: function (key, rule) {
  297. this.array.unshift(rule);
  298. },
  299. keep: function (filter) {
  300. this._keep.unshift({
  301. filter: filter,
  302. replacement: this.keepReplacement
  303. });
  304. },
  305. remove: function (filter) {
  306. this._remove.unshift({
  307. filter: filter,
  308. replacement: function () {
  309. return ''
  310. }
  311. });
  312. },
  313. forNode: function (node) {
  314. if (node.isBlank) return this.blankRule
  315. var rule;
  316. if ((rule = findRule(this.array, node, this.options))) return rule
  317. if ((rule = findRule(this._keep, node, this.options))) return rule
  318. if ((rule = findRule(this._remove, node, this.options))) return rule
  319. return this.defaultRule
  320. },
  321. forEach: function (fn) {
  322. for (var i = 0; i < this.array.length; i++) fn(this.array[i], i);
  323. }
  324. };
  325. function findRule(rules, node, options) {
  326. for (var i = 0; i < rules.length; i++) {
  327. var rule = rules[i];
  328. if (filterValue(rule, node, options)) return rule
  329. }
  330. return void 0
  331. }
  332. function filterValue(rule, node, options) {
  333. var filter = rule.filter;
  334. if (typeof filter === 'string') {
  335. if (filter === node.nodeName.toLowerCase()) return true
  336. } else if (Array.isArray(filter)) {
  337. if (filter.indexOf(node.nodeName.toLowerCase()) > -1) return true
  338. } else if (typeof filter === 'function') {
  339. if (filter.call(rule, node, options)) return true
  340. } else {
  341. throw new TypeError('`filter` needs to be a string, array, or function')
  342. }
  343. }
  344. /**
  345. * The collapseWhitespace function is adapted from collapse-whitespace
  346. * by Luc Thevenard.
  347. *
  348. * The MIT License (MIT)
  349. *
  350. * Copyright (c) 2014 Luc Thevenard <lucthevenard@gmail.com>
  351. *
  352. * Permission is hereby granted, free of charge, to any person obtaining a copy
  353. * of this software and associated documentation files (the "Software"), to deal
  354. * in the Software without restriction, including without limitation the rights
  355. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  356. * copies of the Software, and to permit persons to whom the Software is
  357. * furnished to do so, subject to the following conditions:
  358. *
  359. * The above copyright notice and this permission notice shall be included in
  360. * all copies or substantial portions of the Software.
  361. *
  362. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  363. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  364. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  365. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  366. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  367. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  368. * THE SOFTWARE.
  369. */
  370. /**
  371. * collapseWhitespace(options) removes extraneous whitespace from an the given element.
  372. *
  373. * @param {Object} options
  374. */
  375. function collapseWhitespace(options) {
  376. var element = options.element;
  377. var isBlock = options.isBlock;
  378. var isVoid = options.isVoid;
  379. var isPre = options.isPre || function (node) {
  380. return node.nodeName === 'PRE'
  381. };
  382. if (!element.firstChild || isPre(element)) return
  383. var prevText = null;
  384. var prevVoid = false;
  385. var prev = null;
  386. var node = next(prev, element, isPre);
  387. while (node !== element) {
  388. if (node.nodeType === 3 || node.nodeType === 4) { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
  389. var text = node.data.replace(/[ \r\n\t]+/g, ' ');
  390. if ((!prevText || / $/.test(prevText.data)) &&
  391. !prevVoid && text[0] === ' ') {
  392. text = text.substr(1);
  393. }
  394. // `text` might be empty at this point.
  395. if (!text) {
  396. node = remove(node);
  397. continue
  398. }
  399. node.data = text;
  400. prevText = node;
  401. } else if (node.nodeType === 1) { // Node.ELEMENT_NODE
  402. if (isBlock(node) || node.nodeName === 'BR') {
  403. if (prevText) {
  404. prevText.data = prevText.data.replace(/ $/, '');
  405. }
  406. prevText = null;
  407. prevVoid = false;
  408. } else if (isVoid(node)) {
  409. // Avoid trimming space around non-block, non-BR void elements.
  410. prevText = null;
  411. prevVoid = true;
  412. }
  413. } else {
  414. node = remove(node);
  415. continue
  416. }
  417. var nextNode = next(prev, node, isPre);
  418. prev = node;
  419. node = nextNode;
  420. }
  421. if (prevText) {
  422. prevText.data = prevText.data.replace(/ $/, '');
  423. if (!prevText.data) {
  424. remove(prevText);
  425. }
  426. }
  427. }
  428. /**
  429. * remove(node) removes the given node from the DOM and returns the
  430. * next node in the sequence.
  431. *
  432. * @param {Node} node
  433. * @return {Node} node
  434. */
  435. function remove(node) {
  436. var next = node.nextSibling || node.parentNode;
  437. node.parentNode.removeChild(node);
  438. return next
  439. }
  440. /**
  441. * next(prev, current, isPre) returns the next node in the sequence, given the
  442. * current and previous nodes.
  443. *
  444. * @param {Node} prev
  445. * @param {Node} current
  446. * @param {Function} isPre
  447. * @return {Node}
  448. */
  449. function next(prev, current, isPre) {
  450. if ((prev && prev.parentNode === current) || isPre(current)) {
  451. return current.nextSibling || current.parentNode
  452. }
  453. return current.firstChild || current.nextSibling || current.parentNode
  454. }
  455. /*
  456. * Set up window for Node.js
  457. */
  458. var root = (typeof window !== 'undefined' ? window : {});
  459. /*
  460. * Parsing HTML strings
  461. */
  462. function canParseHTMLNatively() {
  463. var Parser = root.DOMParser;
  464. var canParse = false;
  465. // Adapted from https://gist.github.com/1129031
  466. // Firefox/Opera/IE throw errors on unsupported types
  467. try {
  468. // WebKit returns null on unsupported types
  469. if (new Parser().parseFromString('', 'text/html')) {
  470. canParse = true;
  471. }
  472. } catch (e) {
  473. }
  474. return canParse
  475. }
  476. function createHTMLParser() {
  477. var Parser = function () {
  478. };
  479. {
  480. if (shouldUseActiveX()) {
  481. Parser.prototype.parseFromString = function (string) {
  482. var doc = new window.ActiveXObject('htmlfile');
  483. doc.designMode = 'on'; // disable on-page scripts
  484. doc.open();
  485. doc.write(string);
  486. doc.close();
  487. return doc
  488. };
  489. } else {
  490. Parser.prototype.parseFromString = function (string) {
  491. var doc = document.implementation.createHTMLDocument('');
  492. doc.open();
  493. doc.write(string);
  494. doc.close();
  495. return doc
  496. };
  497. }
  498. }
  499. return Parser
  500. }
  501. function shouldUseActiveX() {
  502. var useActiveX = false;
  503. try {
  504. document.implementation.createHTMLDocument('').open();
  505. } catch (e) {
  506. if (window.ActiveXObject) useActiveX = true;
  507. }
  508. return useActiveX
  509. }
  510. var HTMLParser = canParseHTMLNatively() ? root.DOMParser : createHTMLParser();
  511. function RootNode(input) {
  512. var root;
  513. if (typeof input === 'string') {
  514. var doc = htmlParser().parseFromString(
  515. // DOM parsers arrange elements in the <head> and <body>.
  516. // Wrapping in a custom element ensures elements are reliably arranged in
  517. // a single element.
  518. '<x-turndown id="turndown-root">' + input + '</x-turndown>',
  519. 'text/html'
  520. );
  521. root = doc.getElementById('turndown-root');
  522. } else {
  523. root = input.cloneNode(true);
  524. }
  525. collapseWhitespace({
  526. element: root,
  527. isBlock: isBlock,
  528. isVoid: isVoid
  529. });
  530. return root
  531. }
  532. var _htmlParser;
  533. function htmlParser() {
  534. _htmlParser = _htmlParser || new HTMLParser();
  535. return _htmlParser
  536. }
  537. function Node(node) {
  538. node.isBlock = isBlock(node);
  539. node.isCode = node.nodeName.toLowerCase() === 'code' || node.parentNode.isCode;
  540. node.isBlank = isBlank(node);
  541. node.flankingWhitespace = flankingWhitespace(node);
  542. return node
  543. }
  544. function isBlank(node) {
  545. return (
  546. !isVoid(node) &&
  547. !isMeaningfulWhenBlank(node) &&
  548. /^\s*$/i.test(node.textContent) &&
  549. !hasVoid(node) &&
  550. !hasMeaningfulWhenBlank(node)
  551. )
  552. }
  553. function flankingWhitespace(node) {
  554. var leading = '';
  555. var trailing = '';
  556. if (!node.isBlock) {
  557. var hasLeading = /^\s/.test(node.textContent);
  558. var hasTrailing = /\s$/.test(node.textContent);
  559. var blankWithSpaces = node.isBlank && hasLeading && hasTrailing;
  560. if (hasLeading && !isFlankedByWhitespace('left', node)) {
  561. leading = ' ';
  562. }
  563. if (!blankWithSpaces && hasTrailing && !isFlankedByWhitespace('right', node)) {
  564. trailing = ' ';
  565. }
  566. }
  567. return {leading: leading, trailing: trailing}
  568. }
  569. function isFlankedByWhitespace(side, node) {
  570. var sibling;
  571. var regExp;
  572. var isFlanked;
  573. if (side === 'left') {
  574. sibling = node.previousSibling;
  575. regExp = / $/;
  576. } else {
  577. sibling = node.nextSibling;
  578. regExp = /^ /;
  579. }
  580. if (sibling) {
  581. if (sibling.nodeType === 3) {
  582. isFlanked = regExp.test(sibling.nodeValue);
  583. } else if (sibling.nodeType === 1 && !isBlock(sibling)) {
  584. isFlanked = regExp.test(sibling.textContent);
  585. }
  586. }
  587. return isFlanked
  588. }
  589. var reduce = Array.prototype.reduce;
  590. var leadingNewLinesRegExp = /^\n*/;
  591. var trailingNewLinesRegExp = /\n*$/;
  592. var escapes = [
  593. [/\\/g, '\\\\'],
  594. [/\*/g, '\\*'],
  595. [/^-/g, '\\-'],
  596. [/^\+ /g, '\\+ '],
  597. [/^(=+)/g, '\\$1'],
  598. [/^(#{1,6}) /g, '\\$1 '],
  599. [/`/g, '\\`'],
  600. [/^~~~/g, '\\~~~'],
  601. [/\[/g, '\\['],
  602. [/\]/g, '\\]'],
  603. [/^>/g, '\\>'],
  604. [/_/g, '\\_'],
  605. [/^(\d+)\. /g, '$1\\. ']
  606. ];
  607. function TurndownService(options) {
  608. if (!(this instanceof TurndownService)) return new TurndownService(options)
  609. var defaults = {
  610. rules: rules,
  611. headingStyle: 'atx',
  612. hr: '---',
  613. bulletListMarker: '*',
  614. codeBlockStyle: 'fenced',
  615. fence: '```',
  616. emDelimiter: '_',
  617. strongDelimiter: '**',
  618. linkStyle: 'inlined',
  619. linkReferenceStyle: 'full',
  620. br: ' ',
  621. blankReplacement: function (content, node) {
  622. return node.isBlock ? '\n\n' : ''
  623. },
  624. keepReplacement: function (content, node) {
  625. return node.isBlock ? '\n\n' + node.outerHTML + '\n\n' : node.outerHTML
  626. },
  627. defaultReplacement: function (content, node) {
  628. return node.isBlock ? '\n\n' + content + '\n\n' : content
  629. }
  630. };
  631. this.options = extend({}, defaults, options);
  632. this.rules = new Rules(this.options);
  633. }
  634. TurndownService.prototype = {
  635. /**
  636. * The entry point for converting a string or DOM node to Markdown
  637. * @public
  638. * @param {String|HTMLElement} input The string or DOM node to convert
  639. * @returns A Markdown representation of the input
  640. * @type String
  641. */
  642. turndown: function (input) {
  643. if (!canConvert(input)) {
  644. throw new TypeError(
  645. input + ' is not a string, or an element/document/fragment node.'
  646. )
  647. }
  648. if (input === '') return ''
  649. var output = process.call(this, new RootNode(input));
  650. return postProcess.call(this, output)
  651. },
  652. /**
  653. * Add one or more plugins
  654. * @public
  655. * @param {Function|Array} plugin The plugin or array of plugins to add
  656. * @returns The Turndown instance for chaining
  657. * @type Object
  658. */
  659. use: function (plugin) {
  660. if (Array.isArray(plugin)) {
  661. for (var i = 0; i < plugin.length; i++) this.use(plugin[i]);
  662. } else if (typeof plugin === 'function') {
  663. plugin(this);
  664. } else {
  665. throw new TypeError('plugin must be a Function or an Array of Functions')
  666. }
  667. return this
  668. },
  669. /**
  670. * Adds a rule
  671. * @public
  672. * @param {String} key The unique key of the rule
  673. * @param {Object} rule The rule
  674. * @returns The Turndown instance for chaining
  675. * @type Object
  676. */
  677. addRule: function (key, rule) {
  678. this.rules.add(key, rule);
  679. return this
  680. },
  681. /**
  682. * Keep a node (as HTML) that matches the filter
  683. * @public
  684. * @param {String|Array|Function} filter The unique key of the rule
  685. * @returns The Turndown instance for chaining
  686. * @type Object
  687. */
  688. keep: function (filter) {
  689. this.rules.keep(filter);
  690. return this
  691. },
  692. /**
  693. * Remove a node that matches the filter
  694. * @public
  695. * @param {String|Array|Function} filter The unique key of the rule
  696. * @returns The Turndown instance for chaining
  697. * @type Object
  698. */
  699. remove: function (filter) {
  700. this.rules.remove(filter);
  701. return this
  702. },
  703. /**
  704. * Escapes Markdown syntax
  705. * @public
  706. * @param {String} string The string to escape
  707. * @returns A string with Markdown syntax escaped
  708. * @type String
  709. */
  710. escape: function (string) {
  711. return escapes.reduce(function (accumulator, escape) {
  712. return accumulator.replace(escape[0], escape[1])
  713. }, string)
  714. }
  715. };
  716. /**
  717. * Reduces a DOM node down to its Markdown string equivalent
  718. * @private
  719. * @param {HTMLElement} parentNode The node to convert
  720. * @returns A Markdown representation of the node
  721. * @type String
  722. */
  723. function process(parentNode) {
  724. var self = this;
  725. return reduce.call(parentNode.childNodes, function (output, node) {
  726. node = new Node(node);
  727. var replacement = '';
  728. if (node.nodeType === 3) {
  729. replacement = node.isCode ? node.nodeValue : self.escape(node.nodeValue);
  730. } else if (node.nodeType === 1) {
  731. replacement = replacementForNode.call(self, node);
  732. }
  733. return join(output, replacement)
  734. }, '')
  735. }
  736. /**
  737. * Appends strings as each rule requires and trims the output
  738. * @private
  739. * @param {String} output The conversion output
  740. * @returns A trimmed version of the ouput
  741. * @type String
  742. */
  743. function postProcess(output) {
  744. var self = this;
  745. this.rules.forEach(function (rule) {
  746. if (typeof rule.append === 'function') {
  747. output = join(output, rule.append(self.options));
  748. }
  749. });
  750. return output.replace(/^[\t\r\n]+/, '').replace(/[\t\r\n\s]+$/, '')
  751. }
  752. /**
  753. * Converts an element node to its Markdown equivalent
  754. * @private
  755. * @param {HTMLElement} node The node to convert
  756. * @returns A Markdown representation of the node
  757. * @type String
  758. */
  759. function replacementForNode(node) {
  760. var rule = this.rules.forNode(node);
  761. var content = process.call(this, node);
  762. var whitespace = node.flankingWhitespace;
  763. if (whitespace.leading || whitespace.trailing) content = content.trim();
  764. return (
  765. whitespace.leading +
  766. rule.replacement(content, node, this.options) +
  767. whitespace.trailing
  768. )
  769. }
  770. /**
  771. * Determines the new lines between the current output and the replacement
  772. * @private
  773. * @param {String} output The current conversion output
  774. * @param {String} replacement The string to append to the output
  775. * @returns The whitespace to separate the current output and the replacement
  776. * @type String
  777. */
  778. function separatingNewlines(output, replacement) {
  779. var newlines = [
  780. output.match(trailingNewLinesRegExp)[0],
  781. replacement.match(leadingNewLinesRegExp)[0]
  782. ].sort();
  783. var maxNewlines = newlines[newlines.length - 1];
  784. return maxNewlines.length < 2 ? maxNewlines : '\n\n'
  785. }
  786. function join(string1, string2) {
  787. var separator = separatingNewlines(string1, string2);
  788. // Remove trailing/leading newlines and replace with separator
  789. string1 = string1.replace(trailingNewLinesRegExp, '');
  790. string2 = string2.replace(leadingNewLinesRegExp, '');
  791. return string1 + separator + string2
  792. }
  793. /**
  794. * Determines whether an input can be converted
  795. * @private
  796. * @param {String|HTMLElement} input Describe this parameter
  797. * @returns Describe what it returns
  798. * @type String|Object|Array|Boolean|Number
  799. */
  800. function canConvert(input) {
  801. return (
  802. input != null && (
  803. typeof input === 'string' ||
  804. (input.nodeType && (
  805. input.nodeType === 1 || input.nodeType === 9 || input.nodeType === 11
  806. ))
  807. )
  808. )
  809. }
  810. return TurndownService;
  811. }());
  812. var turndownPluginGfm = (function (exports) {
  813. 'use strict';
  814. var highlightRegExp = /highlight-(?:text|source)-([a-z0-9]+)/;
  815. function highlightedCodeBlock(turndownService) {
  816. turndownService.addRule('highlightedCodeBlock', {
  817. filter: function (node) {
  818. var firstChild = node.firstChild;
  819. return (
  820. node.nodeName === 'DIV' &&
  821. highlightRegExp.test(node.className) &&
  822. firstChild &&
  823. firstChild.nodeName === 'PRE'
  824. )
  825. },
  826. replacement: function (content, node, options) {
  827. var className = node.className || '';
  828. var language = (className.match(highlightRegExp) || [null, ''])[1];
  829. return (
  830. '\n\n' + options.fence + language + '\n' +
  831. node.firstChild.textContent +
  832. '\n' + options.fence + '\n\n'
  833. )
  834. }
  835. });
  836. }
  837. function strikethrough(turndownService) {
  838. turndownService.addRule('strikethrough', {
  839. filter: ['del', 's', 'strike'],
  840. replacement: function (content) {
  841. return '~' + content + '~'
  842. }
  843. });
  844. }
  845. function preformat(turndownService) {
  846. turndownService.addRule('preformat', {
  847. filter: ['pre'],
  848. replacement: function (content, node, options) {
  849. return (
  850. '\n\n' + options.fence + '\n' +
  851. node.textContent.replace(/\<br\>/ig, '\n') +
  852. '\n' + options.fence + '\n\n'
  853. )
  854. },
  855. });
  856. }
  857. function mention(turndownService) {
  858. var originalEscape = turndownService.escape;
  859. turndownService.escape = function (string) {
  860. string = originalEscape(string);
  861. return string.replace(/#(\d+)\\\[(.*)\\\]\(([a-zA-Z]{1})\)/g, function (matches) {
  862. return "#" + arguments[1] + "[" + arguments[2] + "]" + "(" + arguments[3] + ")";
  863. });
  864. }
  865. }
  866. var indexOf = Array.prototype.indexOf;
  867. var every = Array.prototype.every;
  868. var rules = {};
  869. rules.tableCell = {
  870. filter: ['th', 'td'],
  871. replacement: function (content, node) {
  872. return cell(content, node)
  873. }
  874. };
  875. rules.tableRow = {
  876. filter: 'tr',
  877. replacement: function (content, node) {
  878. var borderCells = '';
  879. var alignMap = {left: ':--', right: '--:', center: ':-:'};
  880. if (isHeadingRow(node)) {
  881. for (var i = 0; i < node.childNodes.length; i++) {
  882. var border = '---';
  883. var align = (
  884. node.childNodes[i].getAttribute('align') || ''
  885. ).toLowerCase();
  886. if (align) border = alignMap[align] || border;
  887. borderCells += cell(border, node.childNodes[i]);
  888. }
  889. }
  890. return '\n' + content + (borderCells ? '\n' + borderCells : '')
  891. }
  892. };
  893. rules.table = {
  894. // Only convert tables with a heading row.
  895. // Tables with no heading row are kept using `keep` (see below).
  896. filter: function (node) {
  897. return node.nodeName === 'TABLE' && isHeadingRow(node.rows[0])
  898. },
  899. replacement: function (content) {
  900. // Ensure there are no blank lines
  901. content = content.replace('\n\n', '\n');
  902. return '\n\n' + content + '\n\n'
  903. }
  904. };
  905. rules.tableSection = {
  906. filter: ['thead', 'tbody', 'tfoot'],
  907. replacement: function (content) {
  908. return content
  909. }
  910. };
  911. // A tr is a heading row if:
  912. // - the parent is a THEAD
  913. // - or if its the first child of the TABLE or the first TBODY (possibly
  914. // following a blank THEAD)
  915. // - and every cell is a TH
  916. function isHeadingRow(tr) {
  917. var parentNode = tr.parentNode;
  918. return (
  919. parentNode.nodeName === 'THEAD' ||
  920. (
  921. parentNode.firstChild === tr &&
  922. (parentNode.nodeName === 'TABLE' || isFirstTbody(parentNode)) &&
  923. every.call(tr.childNodes, function (n) {
  924. return n.nodeName === 'TH'
  925. })
  926. )
  927. )
  928. }
  929. function isFirstTbody(element) {
  930. var previousSibling = element.previousSibling;
  931. return (
  932. element.nodeName === 'TBODY' && (
  933. !previousSibling ||
  934. (
  935. previousSibling.nodeName === 'THEAD' &&
  936. /^\s*$/i.test(previousSibling.textContent)
  937. )
  938. )
  939. )
  940. }
  941. function cell(content, node) {
  942. var index = indexOf.call(node.parentNode.childNodes, node);
  943. var prefix = ' ';
  944. if (index === 0) prefix = '| ';
  945. return prefix + content + ' |'
  946. }
  947. function tables(turndownService) {
  948. turndownService.keep(function (node) {
  949. return node.nodeName === 'TABLE' && !isHeadingRow(node.rows[0])
  950. });
  951. for (var key in rules) turndownService.addRule(key, rules[key]);
  952. }
  953. function taskListItems(turndownService) {
  954. turndownService.addRule('taskListItems', {
  955. filter: function (node) {
  956. return node.type === 'checkbox' && node.parentNode.nodeName === 'LI'
  957. },
  958. replacement: function (content, node) {
  959. return (node.checked ? '[x]' : '[ ]') + ' '
  960. }
  961. });
  962. }
  963. function gfm(turndownService) {
  964. turndownService.use([
  965. highlightedCodeBlock,
  966. strikethrough,
  967. tables,
  968. taskListItems,
  969. preformat,
  970. mention,
  971. ]);
  972. }
  973. exports.gfm = gfm;
  974. exports.highlightedCodeBlock = highlightedCodeBlock;
  975. exports.strikethrough = strikethrough;
  976. exports.tables = tables;
  977. exports.taskListItems = taskListItems;
  978. exports.preformat = preformat;
  979. exports.mention = mention;
  980. return exports;
  981. }({}));