<?php

declare(strict_types=1);

namespace Pelago\Emogrifier\Tests\Unit\HtmlProcessor;

use Pelago\Emogrifier\HtmlProcessor\AbstractHtmlProcessor;
use Pelago\Emogrifier\Tests\Unit\HtmlProcessor\Fixtures\TestingHtmlProcessor;
use PHPUnit\Framework\TestCase;

/**
 * Test case.
 *
 * @covers \Pelago\Emogrifier\HtmlProcessor\AbstractHtmlProcessor
 *
 * @author Oliver Klee <github@oliverklee.de>
 */
class AbstractHtmlProcessorTest extends TestCase
{
    /**
     * @test
     */
    public function fromHtmlReturnsAbstractHtmlProcessor()
    {
        $subject = TestingHtmlProcessor::fromHtml('<html></html>');

        self::assertInstanceOf(AbstractHtmlProcessor::class, $subject);
    }

    /**
     * @test
     */
    public function fromHtmlReturnsInstanceOfCalledClass()
    {
        $subject = TestingHtmlProcessor::fromHtml('<html></html>');

        self::assertInstanceOf(TestingHtmlProcessor::class, $subject);
    }

    /**
     * @test
     */
    public function fromDomDocumentReturnsAbstractHtmlProcessor()
    {
        $document = new \DOMDocument();
        $document->loadHTML('<html></html>');
        $subject = TestingHtmlProcessor::fromDomDocument($document);

        self::assertInstanceOf(AbstractHtmlProcessor::class, $subject);
    }

    /**
     * @test
     */
    public function fromDomDocumentReturnsInstanceOfCalledClass()
    {
        $document = new \DOMDocument();
        $document->loadHTML('<html></html>');
        $subject = TestingHtmlProcessor::fromDomDocument($document);

        self::assertInstanceOf(TestingHtmlProcessor::class, $subject);
    }

    /**
     * @test
     */
    public function renderRendersDocumentProvidedToFromDomDocument()
    {
        $innerHtml = '<p>Hello world!</p>';
        $document = new \DOMDocument();
        $document->loadHTML('<html>' . $innerHtml . '</html>');
        $subject = TestingHtmlProcessor::fromDomDocument($document);

        $html = $subject->render();

        self::assertContains($innerHtml, $html);
    }

    /**
     * @test
     */
    public function renderPreservesBodyContentProvidedToFromHtml()
    {
        $innerHtml = '<p>Hello world!</p>';
        $subject = TestingHtmlProcessor::fromHtml('<html>' . $innerHtml . '</html>');

        $html = $subject->render();

        self::assertContains($innerHtml, $html);
    }

    /**
     * @test
     */
    public function renderPreservesOuterHtmlProvidedToFromHtml()
    {
        $rawHtml = '<!DOCTYPE HTML>' .
            '<html>' .
            '<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head>' .
            '<body></body>' .
            '</html>';
        $formattedHtml = "<!DOCTYPE html>\n" .
            "<html>\n" .
            '<head><meta http-equiv="Content-Type" content="text/html; charset=utf-8"></head>' . "\n" .
            "<body></body>\n" .
            "</html>\n";

        $subject = TestingHtmlProcessor::fromHtml($rawHtml);
        $html = $subject->render();

        self::assertEqualsHtml($formattedHtml, $html);
    }

    /**
     * @test
     */
    public function fromHtmlWithEmptyStringThrowsException()
    {
        $this->expectException(\InvalidArgumentException::class);

        TestingHtmlProcessor::fromHtml('');
    }

    /**
     * @return string[][]
     */
    public function invalidHtmlDataProvider(): array
    {
        return [
            'broken nesting gets nested' => ['<b><i></b></i>', '<b><i></i></b>'],
            'partial opening tag gets closed' => ['<b', '<b></b>'],
            'only opening tag gets closed' => ['<b>', '<b></b>'],
            'only closing tag gets removed' => ['foo</b> bar', 'foo bar'],
        ];
    }

    /**
     * @test
     *
     * @param string $input
     * @param string $expectedHtml
     *
     * @dataProvider invalidHtmlDataProvider
     */
    public function renderRepairsBrokenHtml(string $input, string $expectedHtml)
    {
        $subject = TestingHtmlProcessor::fromHtml($input);
        $result = $subject->render();

        self::assertContains($expectedHtml, $result);
    }

    /**
     * @return string[][]
     */
    public function provideHtmlWithOptionalTagsOmitted(): array
    {
        return [
            'LI end tag ommission with LI element following' => [
                '<ul><li> One <li> Two </li></ul>',
                '<ul><li> One </li><li> Two </li></ul>',
            ],
            'LI end tag ommission at end of list' => [
                '<ul><li> One </li><li> Two </ul>',
                '<ul><li> One </li><li> Two </li></ul>',
            ],
            // broken: DT end tag ommission with DT element following
            'DT end tag ommission with DD element following' => [
                '<dl><dt> One </dt><dt> Two <dd> Buckle My Shoe </dd></dl>',
                '<dl><dt> One </dt><dt> Two </dt><dd> Buckle My Shoe </dd></dl>',
            ],
            // broken: DD end tag ommission with DD element following
            'DD end tag ommission with DT element following' => [
                '<dl><dt> One </dt><dd> A </dd><dd> B <dt> Two </dt><dd> C </dd></dl>',
                '<dl><dt> One </dt><dd> A </dd><dd> B </dd><dt> Two </dt><dd> C </dd></dl>',
            ],
            'DD end tag ommission at end of list' => [
                '<dl><dt> One </dt><dd> A </dd><dd> B </dd><dt> Two </dt><dd> C </dl>',
                '<dl><dt> One </dt><dd> A </dd><dd> B </dd><dt> Two </dt><dd> C </dd></dl>',
            ],
            // broken: RT end tag ommission with RT element following
            // broken: RT end tag ommission with RP element following
            'RT end tag ommission at end of annotation' => [
                '<ruby> 攻殻 <rt> こうかく </rt> 機動隊 <rt> きどうたい </ruby>',
                '<ruby> 攻殻 <rt> こうかく </rt> 機動隊 <rt> きどうたい </rt></ruby>',
            ],
            // broken: RP end tag ommission with RT element following
            // broken: RP end tag ommission with RP element following
            'RP end tag ommission at end of annotation' => [
                '<ruby> 明日 <rp> ( </rp><rt> Ashita </rt><rp> ) </ruby>',
                '<ruby> 明日 <rp> ( </rp><rt> Ashita </rt><rp> ) </rp></ruby>',
            ],
            // broken: OPTGROUP end tag ommission with OPTGROUP element following
            'OPTGROUP end tag ommission at end of list' => [
                '<select><optgroup><option> 1 </option><option> 2 </option></optgroup>'
                    . '<optgroup><option> A </option><option> B </option></select>',
                '<select><optgroup><option> 1 </option><option> 2 </option></optgroup>'
                    . '<optgroup><option> A </option><option> B </option></optgroup></select>',
            ],
            'OPTION end tag ommission with OPTION element following' => [
                '<select><option> 1 <option> 2 </option></select>',
                '<select><option> 1 </option><option> 2 </option></select>',
            ],
            // broken: OPTION end tag ommission with OPTGROUP element following
            'OPTION end tag ommission at end of list' => [
                '<select><option> 1 </option><option> 2 </select>',
                '<select><option> 1 </option><option> 2 </option></select>',
            ],
            // broken: COLGROUP start tag omission
            'COLGROUP end tag omission' => [
                '<table><colgroup><col><tr><td></td></tr></table>',
                '<table><colgroup><col></colgroup><tr><td></td></tr></table>',
            ],
            'CAPTION end tag omission' => [
                '<table><caption> Caption <tr><td></td></tr></table>',
                '<table><caption> Caption </caption><tr><td></td></tr></table>',
            ],
            'THEAD end tag omission with TBODY element following' => [
                '<table><thead><tr><td></td></tr><tbody><tr><td></td></tr></tbody></table>',
                '<table><thead><tr><td></td></tr></thead><tbody><tr><td></td></tr></tbody></table>',
            ],
            'THEAD end tag omission with TFOOT element following' => [
                '<table><thead><tr><td></td></tr><tfoot><tr><td></td></tr></tfoot></table>',
                '<table><thead><tr><td></td></tr></thead><tfoot><tr><td></td></tr></tfoot></table>',
            ],
            // broken: TBODY start tag omission
            'TBODY end tag omission with TBODY element following' => [
                '<table><tbody><tr><td></td></tr><tbody><tr><td></td></tr></tbody></table>',
                '<table><tbody><tr><td></td></tr></tbody><tbody><tr><td></td></tr></tbody></table>',
            ],
            'TBODY end tag omission with TFOOT element following' => [
                '<table><tbody><tr><td></td></tr><tfoot><tr><td></td></tr></tfoot></table>',
                '<table><tbody><tr><td></td></tr></tbody><tfoot><tr><td></td></tr></tfoot></table>',
            ],
            'TR end tag omission with TR element following' => [
                '<table><tr><td></td><tr><td></td></tr></table>',
                '<table><tr><td></td></tr><tr><td></td></tr></table>',
            ],
            'TD end tag omission with TD element following' => [
                '<table><tr><td><td></td></tr></table>',
                '<table><tr><td></td><td></td></tr></table>',
            ],
            'TD end tag omission with TH element following' => [
                '<table><tr><td><th></th></tr></table>',
                '<table><tr><td></td><th></th></tr></table>',
            ],
            'TH end tag omission with TD element following' => [
                '<table><tr><th><td></td></tr></table>',
                '<table><tr><th></th><td></td></tr></table>',
            ],
            'TH end tag omission with TH element following' => [
                '<table><tr><th><th></th></tr></table>',
                '<table><tr><th></th><th></th></tr></table>',
            ],
            'P end tag omission with HR element following' => [
                '<p> Hello <hr>',
                '<p> Hello </p><hr>',
            ],
        ];
    }

    /**
     * @test
     *
     * @see https://html.spec.whatwg.org/multipage/syntax.html#optional-tags
     *
     * @param string $htmlWithOptionalTagsOmitted
     * @param string $equivalentHtml
     *
     * @dataProvider provideHtmlWithOptionalTagsOmitted
     */
    public function insertsOptionallyOmittedTags(string $htmlWithOptionalTagsOmitted, string $equivalentHtml)
    {
        $subject = TestingHtmlProcessor::fromHtml('<body>' . $htmlWithOptionalTagsOmitted . '</body>');

        $result = $subject->render();

        self::assertContainsHtml('<body>' . $equivalentHtml . '</body>', $result);
    }

    /**
     * @return string[][]
     */
    public function providePSiblingTagName(): array
    {
        return [
            ['address'],
            // broken: article
            // broken: aside
            ['blockquote'],
            // broken: details
            ['div'],
            ['dl'],
            ['fieldset'],
            // broken: figcaption
            // broken: figure
            // broken: footer
            ['form'],
            ['h1'],
            ['h2'],
            ['h3'],
            ['h4'],
            ['h5'],
            ['h6'],
            // broken: header
            // broken: hgroup
            // broken: main
            ['menu'],
            // broken: nav
            ['ol'],
            ['p'],
            ['pre'],
            // broken: section
            ['table'],
            ['ul'],
        ];
    }

    /**
     * @test
     *
     * @see https://html.spec.whatwg.org/multipage/syntax.html#optional-tags
     *
     * @param string $siblingTagName
     *
     * @dataProvider providePSiblingTagName
     */
    public function insertsOptionallyOmittedClosingPTagBeforeSibling(string $siblingTagName)
    {
        $subject = TestingHtmlProcessor::fromHtml(
            '<body><p> Hello <' . $siblingTagName . '></' . $siblingTagName . '></body>'
        );

        $result = $subject->render();

        self::assertContainsHtml(
            '<body><p> Hello </p><' . $siblingTagName . '></' . $siblingTagName . '></body>',
            $result
        );
    }

    /**
     * @return string[][]
     */
    public function providePParentTagName(): array
    {
        return [
            ['address'],
            ['article'],
            ['aside'],
            ['blockquote'],
            ['div'],
            ['fieldset'],
            ['figure'],
            ['footer'],
            ['form'],
            ['header'],
            ['main'],
            ['nav'],
            ['section'],
            ['template'],
        ];
    }

    /**
     * @test
     *
     * @see https://html.spec.whatwg.org/multipage/syntax.html#optional-tags
     *
     * @param string $parentTagName
     *
     * @dataProvider providePParentTagName
     */
    public function insertsOptionallyOmittedClosingPTagAtEndOfParent(string $parentTagName)
    {
        $subject = TestingHtmlProcessor::fromHtml(
            '<body><' . $parentTagName . '><p> Hello </' . $parentTagName . '><p> World </p></body>'
        );

        $result = $subject->render();

        self::assertContainsHtml(
            '<body><' . $parentTagName . '><p> Hello </p></' . $parentTagName . '><p> World </p></body>',
            $result
        );
    }

    /**
     * @return string[][]
     */
    public function contentWithoutHtmlTagDataProvider(): array
    {
        return [
            'doctype only' => ['<!DOCTYPE html>'],
            'body content only' => ['<p>Hello</p>'],
            'HEAD element' => ['<head></head>'],
            'BODY element' => ['<body></body>'],
            'HEAD AND BODY element' => ['<head></head><body></body>'],
        ];
    }

    /**
     * @test
     *
     * @param string $html
     *
     * @dataProvider contentWithoutHtmlTagDataProvider
     */
    public function addsMissingHtmlTag(string $html)
    {
        $subject = TestingHtmlProcessor::fromHtml($html);

        $result = $subject->render();

        self::assertContains('<html>', $result);
    }

    /**
     * @return string[][]
     */
    public function contentWithoutHeadTagDataProvider(): array
    {
        return [
            'doctype only' => ['<!DOCTYPE html>'],
            'body content only' => ['<p>Hello</p>'],
            'BODY element' => ['<body></body>'],
            'HEADER element' => ['<header></header>'],
            'META element (implicit HEAD)' => ['<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'],
        ];
    }

    /**
     * @test
     *
     * @param string $html
     *
     * @dataProvider contentWithoutHeadTagDataProvider
     */
    public function addsMissingHeadTagOnlyOnce(string $html)
    {
        $subject = TestingHtmlProcessor::fromHtml($html);

        $result = $subject->render();

        $headTagCount = \substr_count($result, '<head>');
        self::assertSame(1, $headTagCount);
    }

    /**
     * @return string[][]
     */
    public function contentWithHeadTagDataProvider(): array
    {
        return [
            'HEAD element' => ['<head></head>'],
            'HEAD element, capitalized' => ['<HEAD></HEAD>'],
            '(invalid) void HEAD element' => ['<head/>'],
            'HEAD element with attribute' => ['<head lang="en"></head>'],
            'HEAD element and HEADER element' => ['<head></head><header></header>'],
        ];
    }

    /**
     * @test
     *
     * @param string $html
     *
     * @dataProvider contentWithHeadTagDataProvider
     */
    public function notAddsSecondHeadTag(string $html)
    {
        $subject = TestingHtmlProcessor::fromHtml($html);

        $result = $subject->render();

        $headTagCount = \preg_match_all('%<head[\\s/>]%', $result);
        self::assertSame(1, $headTagCount);
    }

    /**
     * @test
     */
    public function preservesHeadAttributes()
    {
        $subject = TestingHtmlProcessor::fromHtml('<head lang="en"></head>');

        $result = $subject->render();

        self::assertContains('<head lang="en">', $result);
    }

    /**
     * @return string[][]
     */
    public function contentWithoutBodyTagDataProvider(): array
    {
        return [
            'doctype only' => ['<!DOCTYPE html>'],
            'HEAD element' => ['<head></head>'],
            'body content only' => ['<p>Hello</p>'],
        ];
    }

    /**
     * @test
     *
     * @param string $html
     *
     * @dataProvider contentWithoutBodyTagDataProvider
     */
    public function addsMissingBodyTag(string $html)
    {
        $subject = TestingHtmlProcessor::fromHtml($html);

        $result = $subject->render();

        self::assertContains('<body>', $result);
    }

    /**
     * @test
     */
    public function putsMissingBodyElementAroundBodyContent()
    {
        $subject = TestingHtmlProcessor::fromHtml('<p>Hello</p>');

        $result = $subject->render();

        self::assertContains('<body><p>Hello</p></body>', $result);
    }

    /**
     * @return string[][]
     */
    public function specialCharactersDataProvider(): array
    {
        return [
            'template markers with dollar signs & square brackets' => ['$[USER:NAME]$'],
            'UTF-8 umlauts' => ['Küss die Hand, schöne Frau. イリノイ州シカゴにて、アイルランド系の家庭に、'],
            'HTML entities' => ['a &amp; b &gt; c'],
            'curly braces' => ['{Happy new year!}'],
        ];
    }

    /**
     * @test
     *
     * @param string $codeNotToBeChanged
     *
     * @dataProvider specialCharactersDataProvider
     */
    public function keepsSpecialCharactersInTextNodes(string $codeNotToBeChanged)
    {
        $html = '<html><p>' . $codeNotToBeChanged . '</p></html>';
        $subject = TestingHtmlProcessor::fromHtml($html);

        $result = $subject->render();

        self::assertContains($codeNotToBeChanged, $result);
    }

    /**
     * @test
     */
    public function addsMissingHtml5DocumentType()
    {
        $subject = TestingHtmlProcessor::fromHtml('<html></html>');

        $result = $subject->render();

        self::assertContains('<!DOCTYPE html>', $result);
    }

    /**
     * @return string[][]
     *
     * @psalm-return array<string, array<int, string>>
     */
    public function documentTypeDataProvider(): array
    {
        return [
            'HTML5' => ['<!DOCTYPE html>'],
            'HTML 4.01 strict' => [
                '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" ' .
                '"http://www.w3.org/TR/html4/strict.dtd">',
            ],
            'HTML 4.01 transitional' => [
                '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" ' .
                '"http://www.w3.org/TR/html4/loose.dtd">',
            ],
            'HTML 4 transitional' => [
                '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" ' .
                '"http://www.w3.org/TR/REC-html40/loose.dtd">',
            ],
            'HTML 3.2' => ['<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">'],
        ];
    }

    /**
     * @test
     *
     * @param string $documentType
     *
     * @dataProvider documentTypeDataProvider
     */
    public function keepsExistingDocumentType(string $documentType)
    {
        $html = $documentType . '<html></html>';
        $subject = TestingHtmlProcessor::fromHtml($html);

        $result = $subject->render();

        self::assertContains($documentType, $result);
    }

    /**
     * @return string[][]
     */
    public function normalizedDocumentTypeDataProvider(): array
    {
        return [
            'HTML5, uppercase' => ['<!DOCTYPE HTML>', '<!DOCTYPE html>'],
            'HTML5, lowercase' => ['<!doctype html>', '<!DOCTYPE html>'],
            'HTML5, mixed case' => ['<!DocType Html>', '<!DOCTYPE html>'],
            'HTML5, extra whitespace' => ['<!DOCTYPE  html  >', '<!DOCTYPE html>'],
            'HTML 4 transitional, uppercase' => [
                '<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" '
                    . '"http://www.w3.org/TR/REC-html40/loose.dtd">',
                '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" '
                    . '"http://www.w3.org/TR/REC-html40/loose.dtd">',
            ],
            'HTML 4 transitional, lowercase' => [
                '<!doctype html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" '
                    . '"http://www.w3.org/TR/REC-html40/loose.dtd">',
                '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" '
                    . '"http://www.w3.org/TR/REC-html40/loose.dtd">',
            ],
        ];
    }

    /**
     * @test
     *
     * @param string $documentType
     * @param string $normalizedDocumentType
     *
     * @dataProvider normalizedDocumentTypeDataProvider
     */
    public function normalizesDocumentType(string $documentType, string $normalizedDocumentType)
    {
        $html = $documentType . '<html></html>';
        $subject = TestingHtmlProcessor::fromHtml($html);

        $result = $subject->render();

        self::assertContains($normalizedDocumentType, $result);
    }

    /**
     * @test
     *
     * @param string $html
     *
     * @dataProvider contentWithoutHeadTagDataProvider
     * @dataProvider contentWithHeadTagDataProvider
     */
    public function addsMissingContentTypeMetaTagOnlyOnce(string $html)
    {
        $subject = TestingHtmlProcessor::fromHtml($html);

        $result = $subject->render();

        $numberOfContentTypeMetaTags = \substr_count(
            $result,
            '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'
        );
        self::assertSame(1, $numberOfContentTypeMetaTags);
    }

    /**
     * @return string[][]
     */
    public function htmlAroundContentTypeDataProvider(): array
    {
        return [
            'HTML and HEAD element' => ['<html><head>', '</head></html>'],
            'HTML and HEAD element, HTML end tag omitted' => ['<html><head>', '</head>'],
            'HEAD element only' => ['<head>', '</head>'],
            'HEAD element with attribute' => ['<head lang="en">', '</head>'],
            'HTML, HEAD, and BODY with HEADER elements'
                => ['<html><head>', '</head><body><header></header></body></html>'],
        ];
    }

    /**
     * @test
     *
     * @param string $htmlBefore
     * @param string $htmlAfter
     *
     * @dataProvider htmlAroundContentTypeDataProvider
     */
    public function notAddsSecondContentTypeMetaTag(string $htmlBefore, string $htmlAfter)
    {
        $html = $htmlBefore . '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' . $htmlAfter;
        $subject = TestingHtmlProcessor::fromHtml($html);

        $result = $subject->render();

        $numberOfContentTypeMetaTags = \substr_count($result, 'Content-Type');
        self::assertSame(1, $numberOfContentTypeMetaTags);
    }

    /**
     * @return string[][]
     *
     * @psalm-return array<string, array{0:string, 1:string}>
     */
    public function xmlSelfClosingTagDataProvider(): array
    {
        return [
            '<br>' => ['<br/>', 'br'],
            '<wbr>' => ['foo<wbr/>bar', 'wbr'],
            '<embed>' => [
                '<embed type="video/mp4" src="https://example.com/flower.mp4" width="250" height="200"/>',
                'embed',
            ],
            '<picture> with <source> and <img>' => [
                '<picture><source srcset="https://example.com/flower-800x600.jpeg" media="(min-width: 600px)"/>'
                . '<img src="https://example.com/flower-400x300.jpeg"/></picture>',
                'source',
            ],
            '<video> with <track>' => [
                '<video controls width="250" src="https://example.com/flower.mp4">'
                . '<track default kind="captions" srclang="en" src="https://example.com/flower.vtt"/></video>',
                'track',
            ],
        ];
    }

    /**
     * @return string[][]
     *
     * @psalm-return array<string, array{0:string, 1:string}>
     */
    public function nonXmlSelfClosingTagDataProvider(): array
    {
        return \array_map(
            /**
             * @psalm-param array{0:string, 1:string} $dataset
             *
             * @psalm-return array{0:string, 1:string}
             */
            static function (array $dataset) {
                $dataset[0] = \str_replace('/>', '>', $dataset[0]);
                return $dataset;
            },
            $this->xmlSelfClosingTagDataProvider()
        );
    }

    /**
     * @return string[][] Each dataset has three elements in the following order:
     *         - HTML with non-XML self-closing tags (e.g. "...<br>...");
     *         - The equivalent HTML with XML self-closing tags (e.g. "...<br/>...");
     *         - The name of a self-closing tag contained in the HTML (e.g. "br").
     *
     * @psalm-return array<string, array{0:string, 1:string, 2:string}>
     */
    public function selfClosingTagDataProvider(): array
    {
        return \array_map(
            /**
             * @psalm-param array{0:string, 1:string} $dataset
             *
             * @psalm-return array{0:string, 1:string, 2:string}
             */
            static function (array $dataset) {
                \array_unshift($dataset, \str_replace('/>', '>', $dataset[0]));

                /** @psalm-var array{0:string, 1:string, 2:string} */
                return $dataset;
            },
            $this->xmlSelfClosingTagDataProvider()
        );
    }

    /**
     * Concatenates pairs of datasets (in a similar way to SQL `JOIN`) such that each new dataset consists of a 'row'
     * from a left-hand-side dataset joined with a 'row' from a right-hand-side dataset.
     *
     * @param string[][] $leftDatasets
     * @param string[][] $rightDatasets
     *
     * @psalm-param array<string, array<int, string>> $leftDatasets
     * @psalm-param array<string, array<int, string>> $rightDatasets
     *
     * @return string[][] The new datasets comprise the first dataset from the left-hand side with each of the datasets
     * from the right-hand side, and the each of the remaining datasets from the left-hand side with the first dataset
     * from the right-hand side.
     */
    public static function joinDatasets(array $leftDatasets, array $rightDatasets): array
    {
        $datasets = [];
        $doneFirstLeft = false;
        foreach ($leftDatasets as $leftDatasetName => $leftDataset) {
            foreach ($rightDatasets as $rightDatasetName => $rightDataset) {
                $datasets[$leftDatasetName . ' & ' . $rightDatasetName]
                    = \array_merge($leftDataset, $rightDataset);
                if ($doneFirstLeft) {
                    // Not all combinations are required,
                    // just all of 'right' with one of 'left' and all of 'left' with one of 'right'.
                    break;
                }
            }
            $doneFirstLeft = true;
        }
        return $datasets;
    }

    /**
     * @return string[][]
     */
    public function documentTypeAndSelfClosingTagDataProvider(): array
    {
        return self::joinDatasets($this->documentTypeDataProvider(), $this->selfClosingTagDataProvider());
    }

    /**
     * @test
     *
     * @param string $documentType
     * @param string $htmlWithNonXmlSelfClosingTags
     * @param string $htmlWithXmlSelfClosingTags
     *
     * @dataProvider documentTypeAndSelfClosingTagDataProvider
     */
    public function convertsXmlSelfClosingTagsToNonXmlSelfClosingTag(
        string $documentType,
        string $htmlWithNonXmlSelfClosingTags,
        string $htmlWithXmlSelfClosingTags
    ) {
        $subject = TestingHtmlProcessor::fromHtml(
            $documentType . '<html><body>' . $htmlWithXmlSelfClosingTags . '</body></html>'
        );

        $result = $subject->render();

        self::assertContains('<body>' . $htmlWithNonXmlSelfClosingTags . '</body>', $result);
    }

    /**
     * @test
     *
     * @param string $documentType
     * @param string $htmlWithNonXmlSelfClosingTags
     *
     * @dataProvider documentTypeAndSelfClosingTagDataProvider
     */
    public function keepsNonXmlSelfClosingTags(string $documentType, string $htmlWithNonXmlSelfClosingTags)
    {
        $subject = TestingHtmlProcessor::fromHtml(
            $documentType . '<html><body>' . $htmlWithNonXmlSelfClosingTags . '</body></html>'
        );

        $result = $subject->render();

        self::assertContains('<body>' . $htmlWithNonXmlSelfClosingTags . '</body>', $result);
    }

    /**
     * @test
     *
     * @param string $htmlWithNonXmlSelfClosingTags
     * @param string $tagName
     *
     * @dataProvider nonXmlSelfClosingTagDataProvider
     */
    public function notAddsClosingTagForSelfClosingTags(string $htmlWithNonXmlSelfClosingTags, string $tagName)
    {
        $subject = TestingHtmlProcessor::fromHtml(
            '<html><body>' . $htmlWithNonXmlSelfClosingTags . '</body></html>'
        );

        $result = $subject->render();

        self::assertNotContains('</' . $tagName, $result);
    }

    /**
     * @test
     */
    public function renderBodyContentForEmptyBodyReturnsEmptyString()
    {
        $subject = TestingHtmlProcessor::fromHtml('<html><body></body></html>');

        $result = $subject->renderBodyContent();

        self::assertSame('', $result);
    }

    /**
     * @test
     */
    public function renderBodyContentReturnsBodyContent()
    {
        $bodyContent = '<p>Hello world</p>';
        $subject = TestingHtmlProcessor::fromHtml('<html><body>' . $bodyContent . '</body></html>');

        $result = $subject->renderBodyContent();

        self::assertSame($bodyContent, $result);
    }

    /**
     * Issue #677
     *
     * @test
     */
    public function renderBodyContentForBodyWithAttributeReturnsBodyContent()
    {
        $bodyContent = '<div>simple</div>';
        $subject = TestingHtmlProcessor::fromHtml('<html><body class="foo">' . $bodyContent . '</body></html>');

        $result = $subject->renderBodyContent();

        self::assertSame($bodyContent, $result);
    }

    /**
     * @test
     *
     * @param string $codeNotToBeChanged
     *
     * @dataProvider specialCharactersDataProvider
     */
    public function renderBodyContentKeepsSpecialCharactersInTextNodes(string $codeNotToBeChanged)
    {
        $html = '<html><p>' . $codeNotToBeChanged . '</p></html>';
        $subject = TestingHtmlProcessor::fromHtml($html);

        $result = $subject->renderBodyContent();

        self::assertContains($codeNotToBeChanged, $result);
    }

    /**
     * @test
     *
     * @param string $htmlWithNonXmlSelfClosingTags
     * @param string $tagName
     *
     * @dataProvider nonXmlSelfClosingTagDataProvider
     */
    public function renderBodyContentNotAddsClosingTagForSelfClosingTags(
        string $htmlWithNonXmlSelfClosingTags,
        string $tagName
    ) {
        $subject = TestingHtmlProcessor::fromHtml(
            '<html><body>' . $htmlWithNonXmlSelfClosingTags . '</body></html>'
        );

        $result = $subject->renderBodyContent();

        self::assertNotContains('</' . $tagName, $result);
    }

    /**
     * @test
     */
    public function getDomDocumentReturnsDomDocument()
    {
        $subject = TestingHtmlProcessor::fromHtml('<html></html>');

        self::assertInstanceOf(\DOMDocument::class, $subject->getDomDocument());
    }

    /**
     * @test
     */
    public function getDomDocumentReturnsDomDocumentProvidedToFromDomDocument()
    {
        $document = new \DOMDocument();
        $document->loadHTML('<html></html>');
        $subject = TestingHtmlProcessor::fromDomDocument($document);

        self::assertSame($document, $subject->getDomDocument());
    }

    /**
     * @test
     */
    public function getDomDocumentWithNormalizedHtmlRepresentsTheGivenHtml()
    {
        $html = "<!DOCTYPE html>\n<html>\n<head>" .
            '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">' .
            "</head>\n<body>\n<br>\n</body>\n</html>\n";
        $subject = TestingHtmlProcessor::fromHtml($html);

        $domDocument = $subject->getDomDocument();

        self::assertEqualsHtml($html, $domDocument->saveHTML());
    }

    /**
     * @test
     *
     * @param string $htmlWithNonXmlSelfClosingTags
     * @param string $tagName
     *
     * @dataProvider nonXmlSelfClosingTagDataProvider
     */
    public function getDomDocumentVoidElementNotHasChildNodes(string $htmlWithNonXmlSelfClosingTags, string $tagName)
    {
        // Append a 'trap' element that might become a child node if the HTML is parsed incorrectly
        $subject = TestingHtmlProcessor::fromHtml(
            '<html><body>' . $htmlWithNonXmlSelfClosingTags . '<span>foo</span></body></html>'
        );

        $domDocument = $subject->getDomDocument();

        $voidElements = $domDocument->getElementsByTagName($tagName);
        /** @var \DOMElement $element */
        foreach ($voidElements as $element) {
            self::assertFalse($element->hasChildNodes());
        }
    }

    /**
     * Asserts that an HTML haystack contains an HTML needle, allowing for additional newlines in the haystack that may
     * have been inserted by the `formatOutput` option of `DOMDocument`.
     *
     * @param string $needle
     * @param string $haystack
     * @param string $message
     */
    private static function assertContainsHtml(string $needle, string $haystack, string $message = '')
    {
        $needleMatcher = \preg_quote($needle, '%');
        $needleMatcherWithNewlines = \preg_replace(
            '%\\\\<(?:body|ul|dl|optgroup|table|tr|hr'
                . '|/(?:li|dd|dt|option|optgroup|caption|colgroup|thead|tbody|tfoot|tr|td|th'
                . '|p|dl|h[1-6]|menu|ol|pre|table|ul|address|blockquote|div|fieldset|form))\\\\>%',
            '$0\\n?+',
            $needleMatcher
        );

        self::assertRegExp('%' . $needleMatcherWithNewlines . '%', $haystack, $message);
    }

    /**
     * Asserts that two HTML strings are equal, allowing for whitespace differences in the HTML element itself (but not
     * its descendants) and after its closing tag.
     *
     * @param string $expected
     * @param string $actual
     * @param string $message
     */
    private static function assertEqualsHtml(string $expected, string $actual, string $message = '')
    {
        $normalizedExpected = self::normalizeHtmlElement($expected);
        $normalizedActual = self::normalizeHtmlElement($actual);

        self::assertSame($normalizedExpected, $normalizedActual, $message);
    }

    /**
     * Normalizes whitespace in the HTML element itself (but not its descendants) and after its closing tag, with a
     * single newline inserted or replacing whitespace at positions where whitespace may occur but is superfluous.
     *
     * @param string $html
     *
     * @return string
     */
    private static function normalizeHtmlElement(string $html): string
    {
        return \preg_replace(
            [
                '%(<html(?=[\\s>])[^>]*+>)\\s*+(<head[\\s>])%',
                '%(</head>)\\s*+(<body[\\s>])%',
                '%(</body>)\\s*+(</html>)%',
                '%(</html>)\\s*+($)%',
            ],
            "$1\n$2",
            $html
        );
    }
}