|
41 | 41 | from pip._internal.models.search_scope import SearchScope
|
42 | 42 | from pip._internal.network.session import PipSession
|
43 | 43 | from pip._internal.network.utils import raise_for_status
|
44 |
| -from pip._internal.utils.deprecation import deprecated |
45 | 44 | from pip._internal.utils.filetypes import is_archive_file
|
46 | 45 | from pip._internal.utils.misc import pairwise, redact_auth_from_url
|
47 | 46 | from pip._internal.vcs import vcs
|
@@ -346,34 +345,13 @@ def parse_links(page: "HTMLPage", use_deprecated_html5lib: bool) -> Iterable[Lin
|
346 | 345 | """
|
347 | 346 | Parse an HTML document, and yield its anchor elements as Link objects.
|
348 | 347 | """
|
349 |
| - encoding = page.encoding or "utf-8" |
350 |
| - |
351 |
| - # Check if the page starts with a valid doctype, to decide whether to use |
352 |
| - # http.parser or (deprecated) html5lib for parsing -- unless explicitly |
353 |
| - # requested to use html5lib. |
354 |
| - if not use_deprecated_html5lib: |
355 |
| - expected_doctype = "<!doctype html>".encode(encoding) |
356 |
| - actual_start = page.content[: len(expected_doctype)] |
357 |
| - if actual_start.decode(encoding).lower() != "<!doctype html>": |
358 |
| - deprecated( |
359 |
| - reason=( |
360 |
| - f"The HTML index page being used ({page.url}) is not a proper " |
361 |
| - "HTML 5 document. This is in violation of PEP 503 which requires " |
362 |
| - "these pages to be well-formed HTML 5 documents. Please reach out " |
363 |
| - "to the owners of this index page, and ask them to update this " |
364 |
| - "index page to a valid HTML 5 document." |
365 |
| - ), |
366 |
| - replacement=None, |
367 |
| - gone_in="22.2", |
368 |
| - issue=10825, |
369 |
| - ) |
370 |
| - use_deprecated_html5lib = True |
371 | 348 |
|
372 | 349 | if use_deprecated_html5lib:
|
373 | 350 | yield from _parse_links_html5lib(page)
|
374 | 351 | return
|
375 | 352 |
|
376 |
| - parser = HTMLLinkParser() |
| 353 | + parser = HTMLLinkParser(page.url) |
| 354 | + encoding = page.encoding or "utf-8" |
377 | 355 | parser.feed(page.content.decode(encoding))
|
378 | 356 |
|
379 | 357 | url = page.url
|
@@ -437,14 +415,14 @@ def handle_decl(self, decl: str) -> None:
|
437 | 415 | re.IGNORECASE,
|
438 | 416 | )
|
439 | 417 | if match is None:
|
440 |
| - logger.warn( |
| 418 | + logger.warning( |
441 | 419 | "[present-diagnostic] %s",
|
442 | 420 | BadHTMLDoctypeDeclaration(url=self.url),
|
443 | 421 | )
|
444 | 422 |
|
445 | 423 | def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None:
|
446 | 424 | if not self._dealt_with_doctype_issues:
|
447 |
| - logger.warn( |
| 425 | + logger.warning( |
448 | 426 | "[present-diagnostic] %s",
|
449 | 427 | MissingHTMLDoctypeDeclaration(url=self.url),
|
450 | 428 | )
|
|
0 commit comments