Use html.Parse rather than html.ParseFragment (#16223) (#16225)

* Use html.Parse rather than html.ParseFragment There have been a few issues with html.ParseFragment - just use html.Parse instead. * Skip document node Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: zeripath <art27@cantab.net>
2021-06-22 03:46:39 +02:00 · 2021-06-22 03:46:39 +02:00 · 8ac48584ec
commit 8ac48584ec
parent e898590c81
1 changed files with 14 additions and 17 deletions
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@ -334,40 +334,37 @@ func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
 	_, _ = res.WriteString("</body></html>")

 	// parse the HTML
-	nodes, err := html.ParseFragment(res, nil)
+	node, err := html.Parse(res)
 	if err != nil {
 		return nil, &postProcessError{"invalid HTML", err}
 	}

-	for _, node := range nodes {
-		ctx.visitNode(node, true)
+	if node.Type == html.DocumentNode {
+		node = node.FirstChild
 	}

-	newNodes := make([]*html.Node, 0, len(nodes))
+	ctx.visitNode(node, true)

-	for _, node := range nodes {
-		if node.Data == "html" {
-			node = node.FirstChild
-			for node != nil && node.Data != "body" {
-				node = node.NextSibling
-			}
-		}
-		if node == nil {
-			continue
+	nodes := make([]*html.Node, 0, 5)
+
+	if node.Data == "html" {
+		node = node.FirstChild
+		for node != nil && node.Data != "body" {
+			node = node.NextSibling
 		}
+	}
+	if node != nil {
 		if node.Data == "body" {
 			child := node.FirstChild
 			for child != nil {
-				newNodes = append(newNodes, child)
+				nodes = append(nodes, child)
 				child = child.NextSibling
 			}
 		} else {
-			newNodes = append(newNodes, node)
+			nodes = append(nodes, node)
 		}
 	}

-	nodes = newNodes
-
 	// Create buffer in which the data will be placed again. We know that the
 	// length will be at least that of res; to spare a few alloc+copy, we
 	// reuse res, resetting its length to 0.