@@ -410,6 +410,45 @@ def test_clean_link(url, clean_url):
410
410
assert _clean_link (url ) == clean_url
411
411
412
412
413
+ def _test_parse_links_data_attribute (anchor_html , attr , expected ):
414
+ html = f'<html><head><meta charset="utf-8"><head><body>{ anchor_html } </body></html>'
415
+ html_bytes = html .encode ("utf-8" )
416
+ page = HTMLPage (
417
+ html_bytes ,
418
+ encoding = None ,
419
+ # parse_links() is cached by url, so we inject a random uuid to ensure
420
+ # the page content isn't cached.
421
+ url = f"https://example.com/simple-{ uuid .uuid4 ()} /" ,
422
+ )
423
+ links = list (parse_links (page ))
424
+ (link ,) = links
425
+ actual = getattr (link , attr )
426
+ assert actual == expected
427
+
428
+
429
+ @pytest .mark .parametrize (
430
+ "anchor_html, expected" ,
431
+ [
432
+ # Test not present.
433
+ ('<a href="/pkg-1.0.tar.gz"></a>' , None ),
434
+ # Test present with no value.
435
+ ('<a href="/pkg-1.0.tar.gz" data-requires-python></a>' , None ),
436
+ # Test a value with an escaped character.
437
+ (
438
+ '<a href="/pkg-1.0.tar.gz" data-requires-python=">=3.6"></a>' ,
439
+ ">=3.6" ,
440
+ ),
441
+ # Test requires python is unescaped once.
442
+ (
443
+ '<a href="/pkg-1.0.tar.gz" data-requires-python="&gt;=3.6"></a>' ,
444
+ ">=3.6" ,
445
+ ),
446
+ ],
447
+ )
448
+ def test_parse_links__requires_python (anchor_html , expected ):
449
+ _test_parse_links_data_attribute (anchor_html , "requires_python" , expected )
450
+
451
+
413
452
@pytest .mark .parametrize (
414
453
"anchor_html, expected" ,
415
454
[
@@ -428,27 +467,15 @@ def test_clean_link(url, clean_url):
428
467
'<a href="/pkg-1.0.tar.gz" data-yanked="curlyquote \u2018 "></a>' ,
429
468
"curlyquote \u2018 " ,
430
469
),
470
+ # Test yanked reason is unescaped once.
471
+ (
472
+ '<a href="/pkg-1.0.tar.gz" data-yanked="version &lt; 1"></a>' ,
473
+ "version < 1" ,
474
+ ),
431
475
],
432
476
)
433
477
def test_parse_links__yanked_reason (anchor_html , expected ):
434
- html = (
435
- # Mark this as a unicode string for Python 2 since anchor_html
436
- # can contain non-ascii.
437
- '<html><head><meta charset="utf-8"><head>'
438
- "<body>{}</body></html>"
439
- ).format (anchor_html )
440
- html_bytes = html .encode ("utf-8" )
441
- page = HTMLPage (
442
- html_bytes ,
443
- encoding = None ,
444
- # parse_links() is cached by url, so we inject a random uuid to ensure
445
- # the page content isn't cached.
446
- url = f"https://example.com/simple-{ uuid .uuid4 ()} /" ,
447
- )
448
- links = list (parse_links (page ))
449
- (link ,) = links
450
- actual = link .yanked_reason
451
- assert actual == expected
478
+ _test_parse_links_data_attribute (anchor_html , "yanked_reason" , expected )
452
479
453
480
454
481
def test_parse_links_caches_same_page_by_url ():
0 commit comments