Skip to content

Commit

Permalink
fix(rosetta): properly escape C# comments as XML (#1184)
Browse files Browse the repository at this point in the history
The C# compiler will completely drop comment blocks that aren't
well-formed XML; the <img> tag we were outputting wasn't, and
so namespace headings weren't being generated.

At the same time, pay more attention to escaping of arbitrary
HTML and the text inside attributes to minimize the chances
of things going wrong in the future.
  • Loading branch information
rix0rrr authored Jan 9, 2020
1 parent 9538c48 commit 2bdc589
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 24 deletions.
73 changes: 73 additions & 0 deletions packages/jsii-rosetta/lib/markdown/escapes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
export interface Escaper {
/**
* Escape for use in XML/HTML text
*/
text(x: string | null): string;

/**
* Escape for use in XML/HTML attributes
*/
attribute(x: string | null): string;

/**
* Re-escape a string that has been escaped for text to be escaped for attributes
*
* Conceptually this unescapes text back to raw and re-escapes for attributes,
* but for speed in practice we just do the additional escapes.
*/
text2attr(x: string | null): string;
}

/**
* Make a generic XML escaper
*/
export function makeXmlEscaper(): Escaper {
const attr: Escapes = [...TEXT, ...ATTR_ADDL];

return {
text: (x) => escapeText(TEXT, x),
attribute: (x) => escapeText(attr, x),
text2attr: (x) => escapeText(ATTR_ADDL, x)
};
}

/**
* Make a Java specific escaper
*
* This one also escapes '@' because that triggers parsing of comment directives
* in Java.
*/
export function makeJavaEscaper(): Escaper {
const javaText: Escapes = [...TEXT, [new RegExp('@', 'g'), '&#64;']];
const javaAttr: Escapes = [...javaText, ...ATTR_ADDL];

return {
text: (x) => escapeText(javaText, x),
attribute: (x) => escapeText(javaAttr, x),
text2attr: (x) => escapeText(ATTR_ADDL, x)
};
}

type Escapes = Array<[RegExp, string]>;

const TEXT: Escapes = [
[new RegExp('&', 'g'), '&amp;'],
[new RegExp('<', 'g'), '&lt;'],
[new RegExp('>', 'g'), '&gt;'],
];

// Additional escapes (in addition to the text escapes) which need to be escaped inside attributes.
const ATTR_ADDL: Escapes = [
[new RegExp('"', 'g'), '&quot;'],
[new RegExp("'", 'g'), '&apos;'],
];

function escapeText(set: Escapes, what: string | null): string {
if (!what) { return ''; }

for (const [re, repl] of set) {
what = what.replace(re, repl);
}

return what;
}
20 changes: 8 additions & 12 deletions packages/jsii-rosetta/lib/markdown/javadoc-renderer.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import * as cm from 'commonmark';
import { RendererContext } from './markdown';
import { MarkdownRenderer, collapsePara, para, stripTrailingWhitespace, stripPara } from './markdown-renderer';
import { makeJavaEscaper } from './escapes';

const ESCAPE = makeJavaEscaper();

/* eslint-disable @typescript-eslint/camelcase */

Expand All @@ -16,7 +19,7 @@ export class JavaDocRenderer extends MarkdownRenderer {
}

public code(node: cm.Node, _context: RendererContext) {
return `<code>${escapeCharacters(node.literal)}</code>`;
return `<code>${ESCAPE.text(node.literal)}</code>`;
}

/**
Expand All @@ -30,15 +33,15 @@ export class JavaDocRenderer extends MarkdownRenderer {
*/
/* eslint-disable-next-line @typescript-eslint/camelcase */
public code_block(node: cm.Node, _context: RendererContext) {
return para(`<blockquote><pre>\n${escapeCharacters(node.literal)}</pre></blockquote>`);
return para(`<blockquote><pre>\n${ESCAPE.text(node.literal)}</pre></blockquote>`);
}

public text(node: cm.Node, _context: RendererContext) {
return escapeCharacters(node.literal) || '';
return ESCAPE.text(node.literal) ?? '';
}

public link(node: cm.Node, context: RendererContext) {
return `<a href="${node.destination || ''}">${context.content()}</a>`;
return `<a href="${ESCAPE.attribute(node.destination) ?? ''}">${context.content()}</a>`;
}

public document(_node: cm.Node, context: RendererContext) {
Expand All @@ -60,7 +63,7 @@ export class JavaDocRenderer extends MarkdownRenderer {
}

public image(node: cm.Node, context: RendererContext) {
return `<img alt="${context.content()}" src="${node.destination || ''}">`;
return `<img alt="${ESCAPE.text2attr(context.content())}" src="${ESCAPE.attribute(node.destination) ?? ''}">`;
}

public emph(_node: cm.Node, context: RendererContext) {
Expand All @@ -77,13 +80,6 @@ export class JavaDocRenderer extends MarkdownRenderer {
}
}

/**
* Escape the characters that need escaping in JavaDoc HTML
*/
function escapeCharacters(x: string | null): string {
return x ? x.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/@/g, '&#64;') : '';
}

function collapseParaJava(x: string) {
return collapsePara(x, '\n<p>\n');
}
52 changes: 40 additions & 12 deletions packages/jsii-rosetta/lib/markdown/xml-comment-renderer.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
import * as cm from 'commonmark';
import { prefixLines, RendererContext } from './markdown';
import { MarkdownRenderer, para, stripPara } from './markdown-renderer';
import { makeXmlEscaper } from './escapes';

const ESCAPE = makeXmlEscaper();

// The types for 'xmldom' are not complete.
/* eslint-disable-next-line @typescript-eslint/no-var-requires,@typescript-eslint/no-require-imports */
const { DOMParser, XMLSerializer } = require('xmldom');

/* eslint-disable @typescript-eslint/camelcase */

Expand All @@ -16,19 +23,23 @@ export class CSharpXmlCommentRenderer extends MarkdownRenderer {
}

public code(node: cm.Node, _context: RendererContext) {
return `<c>${escapeCharacters(node.literal)}</c>`;
return `<c>${ESCAPE.text(node.literal)}</c>`;
}

public code_block(node: cm.Node, _context: RendererContext) {
return para(`<code><![CDATA[\n${node.literal}]]></code>`);
}

public text(node: cm.Node, _context: RendererContext) {
return escapeCharacters(node.literal) || '';
return ESCAPE.text(node.literal) ?? '';
}

public link(node: cm.Node, context: RendererContext) {
return `${context.content()} (${node.destination || ''})`;
return `<a href="${ESCAPE.attribute(node.destination) ?? ''}">${context.content()}</a>`;
}

public image(node: cm.Node, context: RendererContext) {
return `<img alt="${ESCAPE.text2attr(context.content())}" src="${ESCAPE.attribute(node.destination) ?? ''}" />`;
}

public emph(_node: cm.Node, context: RendererContext) {
Expand All @@ -53,14 +64,31 @@ export class CSharpXmlCommentRenderer extends MarkdownRenderer {
return `<description>${stripPara(context.content())}</description>\n`;
}

public image(node: cm.Node, context: RendererContext) {
return `<img alt="${context.content()}" src="${node.destination || ''}">`;
public thematic_break(_node: cm.Node, _context: RendererContext) {
return para('<hr />');
}
}

/**
* Escape the characters that need escaping in XML
*/
function escapeCharacters(x: string | null): string {
return x ? x.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;') : '';
}
/**
* HTML needs to be converted to XML
*
* If we don't do this, the parser will reject the whole XML block once it seens an unclosed
* <img> tag.
*/
public html_inline(node: cm.Node, _context: RendererContext) {
const html = node.literal ?? '';
const doc = new DOMParser().parseFromString(html, 'text/html');
return new XMLSerializer().serializeToString(doc);
}

/**
* HTML needs to be converted to XML
*
* If we don't do this, the parser will reject the whole XML block once it seens an unclosed
* <img> tag.
*/
public html_block(node: cm.Node, _context: RendererContext) {
const html = node.literal ?? '';
const doc = new DOMParser().parseFromString(html, 'text/html');
return new XMLSerializer().serializeToString(doc);
}
}
1 change: 1 addition & 0 deletions packages/jsii-rosetta/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"commonmark": "^0.29.0",
"fs-extra": "^8.1.0",
"typescript": "~3.7.4",
"xmldom": "^0.2.1",
"yargs": "^15.1.0"
},
"jest": {
Expand Down
13 changes: 13 additions & 0 deletions packages/jsii-rosetta/test/markdown/javadoc.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,19 @@ if (x &lt; 3) {
`);
});

test('quotes are escaped inside attributes', () => {
expectOutput(`
['tis but a "scratch"](http://bla.ck/"kni"gh&t)
![nay merely a "flesh wound" &cet](http://bla.ck/"kni"gh&t.jpg)
`, `
<a href="http://bla.ck/%22kni%22gh&amp;t">'tis but a "scratch"</a>
<p>
<img alt="nay merely a &quot;flesh wound&quot; &amp;cet" src="http://bla.ck/%22kni%22gh&amp;t.jpg">
`);
});


function expectOutput(source: string, expected: string) {
if (DEBUG) {
// tslint:disable-next-line:no-console
Expand Down
37 changes: 37 additions & 0 deletions packages/jsii-rosetta/test/markdown/xmldoccomments.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,43 @@ if (x < 3) {
`);
});

test('quotes are escaped inside attributes', () => {
expectOutput(`
['tis but a "scratch"](http://bla.ck/"kni"gh&t)
![nay merely a "flesh wound" &cet](http://bla.ck/"kni"gh&t.jpg)
`, `
<a href="http://bla.ck/%22kni%22gh&amp;t">'tis but a "scratch"</a>
<img alt="nay merely a &quot;flesh wound&quot; &amp;cet" src="http://bla.ck/%22kni%22gh&amp;t.jpg" />
`);
});


test('convert header properly', () => {
expectOutput(`
<!--BEGIN STABILITY BANNER-->
---
![Stability: Stable](https://img.shields.io/badge/stability-Stable-success.svg?style=for-the-badge)
---
<!--END STABILITY BANNER-->
`, `
<!--BEGIN STABILITY BANNER-->
<hr />
<img alt="Stability: Stable" src="https://img.shields.io/badge/stability-Stable-success.svg?style=for-the-badge" />
<hr />
<!--END STABILITY BANNER-->
`);
});


function expectOutput(source: string, expected: string) {
if (DEBUG) {
// tslint:disable-next-line:no-console
Expand Down
10 changes: 10 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1328,6 +1328,11 @@
dependencies:
"@types/node" "*"

"@types/xmldom@^0.1.29":
version "0.1.29"
resolved "https://registry.yarnpkg.com/@types/xmldom/-/xmldom-0.1.29.tgz#c4428b0ca86d3b881475726fd94980b38a27c381"
integrity sha1-xEKLDKhtO4gUdXJv2UmAs4onw4E=

"@types/yargs-parser@*":
version "13.1.0"
resolved "https://registry.yarnpkg.com/@types/yargs-parser/-/yargs-parser-13.1.0.tgz#c563aa192f39350a1d18da36c5a8da382bbd8228"
Expand Down Expand Up @@ -8543,6 +8548,11 @@ xmlbuilder@^13.0.2:
resolved "https://registry.yarnpkg.com/xmlbuilder/-/xmlbuilder-13.0.2.tgz#02ae33614b6a047d1c32b5389c1fdacb2bce47a7"
integrity sha512-Eux0i2QdDYKbdbA6AM6xE4m6ZTZr4G4xF9kahI2ukSEMCzwce2eX9WlTI5J3s+NU7hpasFsr8hWIONae7LluAQ==

xmldom@^0.2.1:
version "0.2.1"
resolved "https://registry.yarnpkg.com/xmldom/-/xmldom-0.2.1.tgz#cac9465066f161e1c3302793ea4dbe59c518274f"
integrity sha512-kXXiYvmblIgEemGeB75y97FyaZavx6SQhGppLw5TKWAD2Wd0KAly0g23eVLh17YcpxZpnFym1Qk/eaRjy1APPg==

xtend@^4.0.0, xtend@~4.0.1:
version "4.0.2"
resolved "https://registry.yarnpkg.com/xtend/-/xtend-4.0.2.tgz#bb72779f5fa465186b1f438f674fa347fdb5db54"
Expand Down

0 comments on commit 2bdc589

Please sign in to comment.