1
1
package nokogiri ;
2
2
3
- import static org .jruby .runtime .Helpers .invoke ;
4
-
5
- import java .io .IOException ;
6
- import java .io .InputStream ;
7
-
3
+ import nokogiri .internals .*;
8
4
import org .apache .xerces .parsers .AbstractSAXParser ;
9
5
import org .jruby .Ruby ;
10
6
import org .jruby .RubyClass ;
11
7
import org .jruby .RubyFixnum ;
12
- import org .jruby .RubyModule ;
13
- import org .jruby .RubyObjectAdapter ;
14
8
import org .jruby .anno .JRubyClass ;
15
9
import org .jruby .anno .JRubyMethod ;
16
10
import org .jruby .exceptions .RaiseException ;
17
- import org .jruby .javasupport . JavaEmbedUtils ;
11
+ import org .jruby .runtime . Helpers ;
18
12
import org .jruby .runtime .ThreadContext ;
19
13
import org .jruby .runtime .builtin .IRubyObject ;
20
- import org .xml .sax .ContentHandler ;
21
- import org .xml .sax .ErrorHandler ;
22
14
import org .xml .sax .SAXException ;
23
- import org .xml .sax .SAXNotRecognizedException ;
24
- import org .xml .sax .SAXNotSupportedException ;
25
15
import org .xml .sax .SAXParseException ;
26
16
27
- import nokogiri . internals . NokogiriHandler ;
28
- import nokogiri . internals . NokogiriHelpers ;
29
- import nokogiri . internals . ParserContext ;
30
- import nokogiri . internals . XmlSaxParser ;
17
+ import java . io . IOException ;
18
+ import java . io . InputStream ;
19
+
20
+ import static org . jruby . runtime . Helpers . invoke ;
31
21
32
22
/**
33
23
* Base class for the SAX parsers.
@@ -51,6 +41,7 @@ public class XmlSaxParserContext extends ParserContext
51
41
protected AbstractSAXParser parser ;
52
42
53
43
protected NokogiriHandler handler ;
44
+ protected NokogiriErrorHandler errorHandler ;
54
45
private boolean replaceEntities = true ;
55
46
private boolean recovery = false ;
56
47
@@ -168,31 +159,12 @@ public class XmlSaxParserContext extends ParserContext
168
159
return (XmlSaxParserContext ) NokogiriService .XML_SAXPARSER_CONTEXT_ALLOCATOR .allocate (runtime , klazz );
169
160
}
170
161
171
- /**
172
- * Set a property of the underlying parser.
173
- */
174
- protected void
175
- setProperty (String key , Object val )
176
- throws SAXNotRecognizedException , SAXNotSupportedException
177
- {
178
- parser .setProperty (key , val );
179
- }
180
-
181
- protected void
182
- setContentHandler (ContentHandler handler )
183
- {
184
- parser .setContentHandler (handler );
185
- }
186
-
187
- protected void
188
- setErrorHandler (ErrorHandler handler )
189
- {
190
- parser .setErrorHandler (handler );
191
- }
192
-
193
162
public final NokogiriHandler
194
163
getNokogiriHandler () { return handler ; }
195
164
165
+ public final NokogiriErrorHandler
166
+ getNokogiriErrorHandler () { return errorHandler ; }
167
+
196
168
/**
197
169
* Perform any initialization prior to parsing with the handler
198
170
* <code>handlerRuby</code>. Convenience hook for subclasses.
@@ -223,6 +195,17 @@ public class XmlSaxParserContext extends ParserContext
223
195
parser .parse (getInputSource ());
224
196
}
225
197
198
+ protected static Options
199
+ defaultParseOptions (ThreadContext context )
200
+ {
201
+ return new ParserContext .Options (
202
+ RubyFixnum .fix2long (Helpers .invoke (context ,
203
+ ((RubyClass )context .getRuntime ().getClassFromPath ("Nokogiri::XML::ParseOptions" ))
204
+ .getConstant ("DEFAULT_XML" ),
205
+ "to_i" ))
206
+ );
207
+ }
208
+
226
209
@ JRubyMethod
227
210
public IRubyObject
228
211
parse_with (ThreadContext context , IRubyObject handlerRuby )
@@ -233,14 +216,19 @@ public class XmlSaxParserContext extends ParserContext
233
216
throw runtime .newArgumentError ("argument must respond_to document" );
234
217
}
235
218
236
- NokogiriHandler handler = this .handler = new NokogiriHandler (runtime , handlerRuby );
237
- preParse (runtime , handlerRuby , handler );
219
+ /* TODO: how should we pass in parse options? */
220
+ ParserContext .Options options = defaultParseOptions (context );
221
+
222
+ errorHandler = new NokogiriStrictErrorHandler (runtime , options .noError , options .noWarning );
223
+ handler = new NokogiriHandler (runtime , handlerRuby , errorHandler );
238
224
239
- setContentHandler (handler );
240
- setErrorHandler (handler );
225
+ preParse (runtime , handlerRuby , handler );
226
+ parser .setContentHandler (handler );
227
+ parser .setErrorHandler (handler );
228
+ parser .setEntityResolver (new NokogiriEntityResolver (runtime , errorHandler , options ));
241
229
242
230
try {
243
- setProperty ("http://xml.org/sax/properties/lexical-handler" , handler );
231
+ parser . setProperty ("http://xml.org/sax/properties/lexical-handler" , handler );
244
232
} catch (Exception ex ) {
245
233
throw runtime .newRuntimeError ("Problem while creating XML SAX Parser: " + ex .toString ());
246
234
}
@@ -270,8 +258,6 @@ public class XmlSaxParserContext extends ParserContext
270
258
271
259
postParse (runtime , handlerRuby , handler );
272
260
273
- //maybeTrimLeadingAndTrailingWhitespace(context, handlerRuby);
274
-
275
261
return runtime .getNil ();
276
262
}
277
263
@@ -319,53 +305,6 @@ public class XmlSaxParserContext extends ParserContext
319
305
return context .runtime .newBoolean (recovery );
320
306
}
321
307
322
- /**
323
- * If the handler's document is a FragmentHandler, attempt to trim
324
- * leading and trailing whitespace.
325
- *
326
- * This is a bit hackish and depends heavily on the internals of
327
- * FragmentHandler.
328
- */
329
- protected void
330
- maybeTrimLeadingAndTrailingWhitespace (ThreadContext context , IRubyObject parser )
331
- {
332
- RubyObjectAdapter adapter = JavaEmbedUtils .newObjectAdapter ();
333
- RubyModule mod = context .getRuntime ().getClassFromPath ("Nokogiri::XML::FragmentHandler" );
334
-
335
- IRubyObject handler = adapter .getInstanceVariable (parser , "@document" );
336
- if (handler == null || handler .isNil () || !adapter .isKindOf (handler , mod )) {
337
- return ;
338
- }
339
- IRubyObject stack = adapter .getInstanceVariable (handler , "@stack" );
340
- if (stack == null || stack .isNil ()) {
341
- return ;
342
- }
343
- // doc is finally a DocumentFragment whose nodes we can check
344
- IRubyObject doc = adapter .callMethod (stack , "first" );
345
- if (doc == null || doc .isNil ()) {
346
- return ;
347
- }
348
-
349
- IRubyObject children ;
350
-
351
- for (;;) {
352
- children = adapter .callMethod (doc , "children" );
353
- IRubyObject first = adapter .callMethod (children , "first" );
354
- if (NokogiriHelpers .isBlank (first )) { adapter .callMethod (first , "unlink" ); }
355
- else { break ; }
356
- }
357
-
358
- for (;;) {
359
- children = adapter .callMethod (doc , "children" );
360
- IRubyObject last = adapter .callMethod (children , "last" );
361
- if (NokogiriHelpers .isBlank (last )) { adapter .callMethod (last , "unlink" ); }
362
- else { break ; }
363
- }
364
-
365
- // While we have a document, normalize it.
366
- ((XmlNode ) doc ).normalize ();
367
- }
368
-
369
308
@ JRubyMethod (name = "column" )
370
309
public IRubyObject
371
310
column (ThreadContext context )
@@ -383,5 +322,4 @@ public class XmlSaxParserContext extends ParserContext
383
322
if (number == null ) { return context .getRuntime ().getNil (); }
384
323
return RubyFixnum .newFixnum (context .getRuntime (), number .longValue ());
385
324
}
386
-
387
325
}
0 commit comments