Skip to content

Commit ca1e7b5

Browse files
committed
Simplify emphasis pattern.
1 parent 8e5b69a commit ca1e7b5

File tree

3 files changed

+115
-116
lines changed

3 files changed

+115
-116
lines changed

mistune/inlines.py

+22-24
Original file line numberDiff line numberDiff line change
@@ -58,26 +58,18 @@ class InlineParser(ScannerParser):
5858
#: [an example]: https://example.com "optional title"
5959
REF_LINK2 = r'!?\[((?:[^\\\[\]]|' + ESCAPE + '){0,1000})\]'
6060

61-
#: emphasis with * or _::
61+
#: emphasis and strong * or _::
6262
#:
63-
#: *text*
64-
#: _text_
65-
EMPHASIS = (
66-
r'\b_[^\s_](?:(?<=\\)_)?_|' # _s_ and _\_-
67-
r'\*[^\s*](?:(?<=\\)\*)?\*|' # *s* and *\**
68-
r'\b_[^\s_][\s\S]*?[^\s_]_(?!_|[^\s' + PUNCTUATION + r'])\b|'
69-
r'\*[^\s*"<\[][\s\S]*?[^\s*]\*'
63+
#: *emphasis* **strong**
64+
#: _emphasis_ __strong__
65+
ASTERISK_EMPHASIS = (
66+
r'(\*{1,2})((?:(?:' + ESCAPE + r'|[^\s*"<\[])[\s\S]*?)?'
67+
r'(?:' + ESCAPE + r'|[^\s*]))\1'
7068
)
71-
72-
#: strong with ** or __::
73-
#:
74-
#: **text**
75-
#: __text__
76-
STRONG = (
77-
r'\b__[^\s\_]__(?!_)\b|'
78-
r'\*\*[^\s\*]\*\*(?!\*)|'
79-
r'\b__[^\s][\s\S]*?[^\s]__(?!_)\b|'
80-
r'\*\*[^\s][\s\S]*?[^\s]\*\*(?!\*)'
69+
UNDERSCORE_EMPHASIS = (
70+
r'\b(_{1,2})((?:(?:' + ESCAPE + r'|[^\s_])[\s\S]*?)?'
71+
r'(?:' + ESCAPE + r'|[^\s_]))\1'
72+
r'(?!_|[^\s' + PUNCTUATION + r'])\b'
8173
)
8274

8375
#: codespan with `::
@@ -109,7 +101,8 @@ class InlineParser(ScannerParser):
109101

110102
RULE_NAMES = (
111103
'escape', 'inline_html', 'auto_link', 'footnote',
112-
'std_link', 'ref_link', 'ref_link2', 'strong', 'emphasis',
104+
'std_link', 'ref_link', 'ref_link2',
105+
'asterisk_emphasis', 'underscore_emphasis',
113106
'codespan', 'strikethrough', 'linebreak',
114107
)
115108

@@ -186,12 +179,17 @@ def parse_footnote(self, m, state):
186179
state['footnotes'].append(key)
187180
return 'footnote_ref', key, index
188181

189-
def parse_emphasis(self, m, state):
190-
text = m.group(0)[1:-1]
191-
return 'emphasis', self.render(text, state)
182+
def parse_asterisk_emphasis(self, m, state):
183+
return self.tokenize_emphasis(m, state)
184+
185+
def parse_underscore_emphasis(self, m, state):
186+
return self.tokenize_emphasis(m, state)
192187

193-
def parse_strong(self, m, state):
194-
text = m.group(0)[2:-2]
188+
def tokenize_emphasis(self, m, state):
189+
marker = m.group(1)
190+
text = m.group(2)
191+
if len(marker) == 1:
192+
return 'emphasis', self.render(text, state)
195193
return 'strong', self.render(text, state)
196194

197195
def parse_codespan(self, m, state):

tests/fixtures/__init__.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,12 @@
1313

1414
def load_cases(TestClass, assert_method, filename, ignore=None):
1515
def attach_case(n, text, html):
16-
def test_case(self):
16+
def method(self):
1717
assert_method(self, n, text, html)
1818

1919
name = 'test_{}'.format(n)
20-
setattr(TestClass, name, test_case)
20+
method.__name__ = name
21+
setattr(TestClass, name, method)
2122

2223
for n, text, html in load_examples(filename):
2324
if ignore and ignore(n):
@@ -45,7 +46,7 @@ def parse_examples(text):
4546

4647
if md and html:
4748
count += 1
48-
n = '%s_%02d' % (section, count)
49+
n = '%s_%03d' % (section, count)
4950
md = md.replace(u'\u2192', '\t')
5051
html = html.replace(u'\u2192', '\t')
5152
yield n, md, html

tests/test_commonmark.py

+89-89
Original file line numberDiff line numberDiff line change
@@ -5,99 +5,95 @@
55

66

77
IGNORE_CASES = {
8-
'setext_headings_02', # we only allow one line title
9-
'setext_headings_15',
10-
11-
'setext_headings_03', # must start with 2 = or -
12-
'setext_headings_07', # ignore
13-
'setext_headings_13', # ignore
14-
15-
'html_blocks_39', # ignore
16-
'link_reference_definitions_19', # weird rule
17-
18-
'block_quotes_08', # we treat it different
19-
20-
'list_items_05', # I don't agree
21-
'list_items_24',
22-
'list_items_28',
23-
'list_items_39', # no lazy
24-
'list_items_40',
25-
'list_items_41',
26-
27-
'lists_07', # we use simple way to detect tight list
28-
'lists_16',
29-
'lists_17',
30-
'lists_18',
31-
'lists_19',
32-
33-
'block_quotes_05', # we don't allow lazy continuation
34-
'block_quotes_06',
35-
'block_quotes_11',
36-
'block_quotes_20',
37-
'block_quotes_23',
38-
'block_quotes_24', # this test case shows why lazy is not good
39-
40-
'code_spans_09', # code has no priority
41-
'code_spans_10',
42-
43-
'entity_and_numeric_character_references_04', # &entity is allowed
44-
'entity_and_numeric_character_references_05',
45-
46-
'links_31', # different behavior
47-
'links_37',
48-
'links_38', # code has no priority
49-
'links_39',
50-
'links_45', # different behavior
51-
'links_46',
52-
'links_49',
53-
'links_50', # code has no priority
54-
'links_51', # different behavior
55-
'links_64', # allow empty key
56-
'links_65',
57-
58-
'images_02', # we just keep everything as raw
59-
'images_03',
60-
'images_04',
61-
'images_05',
62-
'images_06',
63-
'images_14',
64-
'images_18',
65-
66-
'autolinks_02', # don't understand
8+
'setext_headings_002', # we only allow one line title
9+
'setext_headings_015',
10+
11+
'setext_headings_003', # must start with 2 = or -
12+
'setext_headings_007', # ignore
13+
'setext_headings_013', # ignore
14+
15+
'html_blocks_039', # ignore
16+
'link_reference_definitions_019', # weird rule
17+
18+
'block_quotes_008', # we treat it different
19+
20+
'list_items_005', # I don't agree
21+
'list_items_024',
22+
'list_items_028',
23+
'list_items_039', # no lazy
24+
'list_items_040',
25+
'list_items_041',
26+
27+
'lists_007', # we use simple way to detect tight list
28+
'lists_016',
29+
'lists_017',
30+
'lists_018',
31+
'lists_019',
32+
33+
'block_quotes_005', # we don't allow lazy continuation
34+
'block_quotes_006',
35+
'block_quotes_011',
36+
'block_quotes_020',
37+
'block_quotes_023',
38+
'block_quotes_024', # this test case shows why lazy is not good
39+
40+
'code_spans_009', # code has no priority
41+
'code_spans_010',
42+
43+
'entity_and_numeric_character_references_004', # &entity is allowed
44+
'entity_and_numeric_character_references_005',
45+
46+
'links_031', # different behavior
47+
'links_037',
48+
'links_038', # code has no priority
49+
'links_039',
50+
'links_045', # different behavior
51+
'links_046',
52+
'links_049',
53+
'links_050', # code has no priority
54+
'links_051', # different behavior
55+
'links_064', # allow empty key
56+
'links_065',
57+
58+
'images_002', # we just keep everything as raw
59+
'images_003',
60+
'images_004',
61+
'images_005',
62+
'images_006',
63+
'images_014',
64+
'images_018',
65+
66+
'autolinks_002', # don't understand
6767
}
6868
INSANE_CASES = {
69-
'fenced_code_blocks_13',
70-
'fenced_code_blocks_15',
71-
'list_items_33',
72-
'list_items_38',
73-
74-
'link_reference_definitions_02', # only allow one line definition
75-
'link_reference_definitions_03',
76-
'link_reference_definitions_04',
77-
'link_reference_definitions_05',
78-
'link_reference_definitions_07',
79-
'link_reference_definitions_21',
80-
81-
'links_25',
82-
'links_32',
83-
'links_33',
84-
'links_41',
85-
'links_60',
86-
'links_82',
87-
'links_84',
69+
'fenced_code_blocks_013',
70+
'fenced_code_blocks_015',
71+
'list_items_033',
72+
'list_items_038',
73+
74+
'link_reference_definitions_002', # only allow one line definition
75+
'link_reference_definitions_003',
76+
'link_reference_definitions_004',
77+
'link_reference_definitions_005',
78+
'link_reference_definitions_007',
79+
'link_reference_definitions_021',
80+
81+
'links_025',
82+
'links_032',
83+
'links_033',
84+
'links_041',
85+
'links_060',
86+
'links_082',
87+
'links_084',
8888
}
8989

9090
DIFFERENCES = {
91-
'tabs_05': lambda s: s.replace('<code> ', '<code>'),
92-
'tabs_06': lambda s: s.replace('<code> ', '<code>'),
93-
'tabs_07': lambda s: s.replace('<code> ', '<code>'),
91+
'tabs_005': lambda s: s.replace('<code> ', '<code>'),
92+
'tabs_006': lambda s: s.replace('<code> ', '<code>'),
93+
'tabs_007': lambda s: s.replace('<code> ', '<code>'),
9494
}
9595

9696

97-
class TestCommonMark(TestCase):
98-
pass
99-
100-
10197
def assert_spec(self, n, text, html):
10298
print(text)
10399
result = mistune.html(text)
@@ -120,16 +116,20 @@ def assert_spec(self, n, text, html):
120116
'paragraphs', 'blank_lines',
121117
'block_quotes', 'list_items', 'lists',
122118
'backslash', 'entity', 'code_spans',
123-
# emphasis, links
124-
'images', 'autolinks', 'raw_html',
119+
# emphasis
120+
'links', 'images', 'autolinks', 'raw_html',
125121
'hard_line', 'soft_line', 'textual',
126122
}
127123

128124

129125
def ignore(n):
130-
if not n.startswith('links'):
126+
if n.startswith('emphasis'):
131127
return True
132128
return (n in IGNORE_CASES) or (n in INSANE_CASES)
133129

134130

135-
fixtures.load_cases(TestCase, assert_spec, 'commonmark.txt', ignore)
131+
class TestCommonMark(TestCase):
132+
pass
133+
134+
135+
fixtures.load_cases(TestCommonMark, assert_spec, 'commonmark.txt', ignore)

0 commit comments

Comments
 (0)