23
23
# and whitespace ( \t\n\r)
24
24
RE_WORD = "[^][(){}=$\u2500 -\u27BF \uE000 -\uF8FF \\ t\\ n\\ r]+"
25
25
26
+ MIN_LENGTH_DEFAULT = 5
27
+
26
28
27
29
class ExtraktoException (Exception ):
28
30
pass
29
31
30
32
31
33
class Extrakto :
32
- def __init__ (self , * , min_length = 5 , alt = False , prefix_name = False ):
34
+ def __init__ (self , * , min_length = None , alt = False , prefix_name = False ):
33
35
conf = ConfigParser (interpolation = None )
34
36
default_conf = os .path .join (SCRIPT_DIR , "extrakto.conf" )
35
37
user_conf = os .path .join (
@@ -71,6 +73,12 @@ def __init__(self, *, min_length=5, alt=False, prefix_name=False):
71
73
lstrip = sect .get ("lstrip" , "" ),
72
74
rstrip = sect .get ("rstrip" , "" ),
73
75
alt = alt ,
76
+ # prefer global min_length, fallback to filter specific
77
+ min_length = (
78
+ self .min_length
79
+ if self .min_length is not None
80
+ else sect .getint ("min_length" , MIN_LENGTH_DEFAULT )
81
+ ),
74
82
)
75
83
76
84
def __getitem__ (self , key ):
@@ -86,14 +94,26 @@ def keys(self):
86
94
87
95
88
96
class FilterDef :
89
- def __init__ (self , extrakto , name , * , regex , exclude , lstrip , rstrip , alt ):
97
+ def __init__ (
98
+ self ,
99
+ extrakto ,
100
+ name ,
101
+ * ,
102
+ regex ,
103
+ exclude ,
104
+ lstrip ,
105
+ rstrip ,
106
+ alt ,
107
+ min_length = MIN_LENGTH_DEFAULT ,
108
+ ):
90
109
self .extrakto = extrakto
91
110
self .name = name
92
111
self .regex = regex
93
112
self .exclude = exclude
94
113
self .lstrip = lstrip
95
114
self .rstrip = rstrip
96
115
self .alt = alt
116
+ self .min_length = min_length
97
117
98
118
def filter (self , text ):
99
119
res = list ()
@@ -111,7 +131,7 @@ def filter(self, text):
111
131
if self .rstrip :
112
132
item = item .rstrip (self .rstrip )
113
133
114
- if len (item ) >= self .extrakto . min_length :
134
+ if len (item ) >= self .min_length :
115
135
if not self .exclude or not re .search (self .exclude , item , re .I ):
116
136
if self .extrakto .alt :
117
137
for i , altre in enumerate (self .alt ):
@@ -122,7 +142,7 @@ def filter(self, text):
122
142
return res
123
143
124
144
125
- def get_lines (text , * , min_length = 5 , prefix_name = False ):
145
+ def get_lines (text , * , min_length = MIN_LENGTH_DEFAULT , prefix_name = False ):
126
146
lines = []
127
147
128
148
for raw_line in text .splitlines ():
@@ -209,9 +229,7 @@ def main(parser):
209
229
210
230
parser .add_argument ("-r" , "--reverse" , action = "store_true" , help = "reverse output" )
211
231
212
- parser .add_argument (
213
- "-m" , "--min-length" , default = 5 , help = "minimum token length" , type = int
214
- )
232
+ parser .add_argument ("-m" , "--min-length" , help = "minimum token length" , type = int )
215
233
216
234
parser .add_argument (
217
235
"--warn-empty" , action = "store_true" , help = "warn if result is empty"
0 commit comments